comparison NGSrich_0.5.5/src/org/jdom/input/SAXHandler.java @ 0:89ad0a9cca52 default tip

Uploaded
author pfrommolt
date Mon, 21 Nov 2011 08:12:19 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:89ad0a9cca52
1 /*--
2
3 $Id: SAXHandler.java,v 1.73 2007/11/10 05:29:00 jhunter Exp $
4
5 Copyright (C) 2000-2007 Jason Hunter & Brett McLaughlin.
6 All rights reserved.
7
8 Redistribution and use in source and binary forms, with or without
9 modification, are permitted provided that the following conditions
10 are met:
11
12 1. Redistributions of source code must retain the above copyright
13 notice, this list of conditions, and the following disclaimer.
14
15 2. Redistributions in binary form must reproduce the above copyright
16 notice, this list of conditions, and the disclaimer that follows
17 these conditions in the documentation and/or other materials
18 provided with the distribution.
19
20 3. The name "JDOM" must not be used to endorse or promote products
21 derived from this software without prior written permission. For
22 written permission, please contact <request_AT_jdom_DOT_org>.
23
24 4. Products derived from this software may not be called "JDOM", nor
25 may "JDOM" appear in their name, without prior written permission
26 from the JDOM Project Management <request_AT_jdom_DOT_org>.
27
28 In addition, we request (but do not require) that you include in the
29 end-user documentation provided with the redistribution and/or in the
30 software itself an acknowledgement equivalent to the following:
31 "This product includes software developed by the
32 JDOM Project (http://www.jdom.org/)."
33 Alternatively, the acknowledgment may be graphical using the logos
34 available at http://www.jdom.org/images/logos.
35
36 THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
37 WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
38 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
39 DISCLAIMED. IN NO EVENT SHALL THE JDOM AUTHORS OR THE PROJECT
40 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
42 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
43 USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
44 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
45 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
46 OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
47 SUCH DAMAGE.
48
49 This software consists of voluntary contributions made by many
50 individuals on behalf of the JDOM Project and was originally
51 created by Jason Hunter <jhunter_AT_jdom_DOT_org> and
52 Brett McLaughlin <brett_AT_jdom_DOT_org>. For more information
53 on the JDOM Project, please see <http://www.jdom.org/>.
54
55 */
56
57 package org.jdom.input;
58
59 import java.util.*;
60
61 import org.jdom.*;
62 import org.xml.sax.*;
63 import org.xml.sax.ext.*;
64 import org.xml.sax.helpers.*;
65
66 /**
67 * A support class for {@link SAXBuilder}.
68 *
69 * @version $Revision: 1.73 $, $Date: 2007/11/10 05:29:00 $
70 * @author Brett McLaughlin
71 * @author Jason Hunter
72 * @author Philip Nelson
73 * @author Bradley S. Huffman
74 * @author phil@triloggroup.com
75 */
76 public class SAXHandler extends DefaultHandler implements LexicalHandler,
77 DeclHandler,
78 DTDHandler {
79
80 private static final String CVS_ID =
81 "@(#) $RCSfile: SAXHandler.java,v $ $Revision: 1.73 $ $Date: 2007/11/10 05:29:00 $ $Name: jdom_1_1_1 $";
82
83 /** Hash table to map SAX attribute type names to JDOM attribute types. */
84 private static final Map attrNameToTypeMap = new HashMap(13);
85
86 /** <code>Document</code> object being built */
87 private Document document;
88
89 /** <code>Element</code> object being built */
90 private Element currentElement;
91
92 /** Indicator of where in the document we are */
93 private boolean atRoot;
94
95 /** Indicator of whether we are in the DocType. Note that the DTD consists
96 * of both the internal subset (inside the <!DOCTYPE> tag) and the
97 * external subset (in a separate .dtd file). */
98 private boolean inDTD = false;
99
100 /** Indicator of whether we are in the internal subset */
101 private boolean inInternalSubset = false;
102
103 /** Indicator of whether we previously were in a CDATA */
104 private boolean previousCDATA = false;
105
106 /** Indicator of whether we are in a CDATA */
107 private boolean inCDATA = false;
108
109 /** Indicator of whether we should expand entities */
110 private boolean expand = true;
111
112 /** Indicator of whether we are actively suppressing (non-expanding) a
113 current entity */
114 private boolean suppress = false;
115
116 /** How many nested entities we're currently within */
117 private int entityDepth = 0; // XXX may not be necessary anymore?
118
119 /** Temporary holder for namespaces that have been declared with
120 * startPrefixMapping, but are not yet available on the element */
121 private List declaredNamespaces;
122
123 /** Temporary holder for the internal subset */
124 private StringBuffer internalSubset = new StringBuffer();
125
126 /** Temporary holder for Text and CDATA */
127 private TextBuffer textBuffer = new TextBuffer();
128
129 /** The external entities defined in this document */
130 private Map externalEntities;
131
132 /** The JDOMFactory used for JDOM object creation */
133 private JDOMFactory factory;
134
135 /** Whether to ignore ignorable whitespace */
136 private boolean ignoringWhite = false;
137
138 /** Whether to ignore text containing all whitespace */
139 private boolean ignoringBoundaryWhite = false;
140
141 /** The SAX Locator object provided by the parser */
142 private Locator locator;
143
144 /**
145 * Class initializer: Populate a table to translate SAX attribute
146 * type names into JDOM attribute type value (integer).
147 * <p>
148 * <b>Note that all the mappings defined below are compliant with
149 * the SAX 2.0 specification exception for "ENUMERATION" with is
150 * specific to Crimson 1.1.X and Xerces 2.0.0-betaX which report
151 * attributes of enumerated types with a type "ENUMERATION"
152 * instead of the expected "NMTOKEN".
153 * </p>
154 * <p>
155 * Note also that Xerces 1.4.X is not SAX 2.0 compliant either
156 * but handling its case requires
157 * {@link #getAttributeType specific code}.
158 * </p>
159 */
160 static {
161 attrNameToTypeMap.put("CDATA",
162 new Integer(Attribute.CDATA_TYPE));
163 attrNameToTypeMap.put("ID",
164 new Integer(Attribute.ID_TYPE));
165 attrNameToTypeMap.put("IDREF",
166 new Integer(Attribute.IDREF_TYPE));
167 attrNameToTypeMap.put("IDREFS",
168 new Integer(Attribute.IDREFS_TYPE));
169 attrNameToTypeMap.put("ENTITY",
170 new Integer(Attribute.ENTITY_TYPE));
171 attrNameToTypeMap.put("ENTITIES",
172 new Integer(Attribute.ENTITIES_TYPE));
173 attrNameToTypeMap.put("NMTOKEN",
174 new Integer(Attribute.NMTOKEN_TYPE));
175 attrNameToTypeMap.put("NMTOKENS",
176 new Integer(Attribute.NMTOKENS_TYPE));
177 attrNameToTypeMap.put("NOTATION",
178 new Integer(Attribute.NOTATION_TYPE));
179 attrNameToTypeMap.put("ENUMERATION",
180 new Integer(Attribute.ENUMERATED_TYPE));
181 }
182
183 /**
184 * This will create a new <code>SAXHandler</code> that listens to SAX
185 * events and creates a JDOM Document. The objects will be constructed
186 * using the default factory.
187 */
188 public SAXHandler() {
189 this(null);
190 }
191
192 /**
193 * This will create a new <code>SAXHandler</code> that listens to SAX
194 * events and creates a JDOM Document. The objects will be constructed
195 * using the provided factory.
196 *
197 * @param factory <code>JDOMFactory</code> to be used for constructing
198 * objects
199 */
200 public SAXHandler(JDOMFactory factory) {
201 if (factory != null) {
202 this.factory = factory;
203 } else {
204 this.factory = new DefaultJDOMFactory();
205 }
206
207 atRoot = true;
208 declaredNamespaces = new ArrayList();
209 externalEntities = new HashMap();
210
211 document = this.factory.document(null);
212 }
213
214 /**
215 * Pushes an element onto the tree under construction. Allows subclasses
216 * to put content under a dummy root element which is useful for building
217 * content that would otherwise be a non-well formed document.
218 *
219 * @param element root element under which content will be built
220 */
221 protected void pushElement(Element element) {
222 if (atRoot) {
223 document.setRootElement(element); // XXX should we use a factory call?
224 atRoot = false;
225 }
226 else {
227 factory.addContent(currentElement, element);
228 }
229 currentElement = element;
230 }
231
232 /**
233 * Returns the document. Should be called after parsing is complete.
234 *
235 * @return <code>Document</code> - Document that was built
236 */
237 public Document getDocument() {
238 return document;
239 }
240
241 /**
242 * Returns the factory used for constructing objects.
243 *
244 * @return <code>JDOMFactory</code> - the factory used for
245 * constructing objects.
246 *
247 * @see #SAXHandler(org.jdom.JDOMFactory)
248 */
249 public JDOMFactory getFactory() {
250 return factory;
251 }
252
253 /**
254 * This sets whether or not to expand entities during the build.
255 * A true means to expand entities as normal content. A false means to
256 * leave entities unexpanded as <code>EntityRef</code> objects. The
257 * default is true.
258 *
259 * @param expand <code>boolean</code> indicating whether entity expansion
260 * should occur.
261 */
262 public void setExpandEntities(boolean expand) {
263 this.expand = expand;
264 }
265
266 /**
267 * Returns whether or not entities will be expanded during the
268 * build.
269 *
270 * @return <code>boolean</code> - whether entity expansion
271 * will occur during build.
272 *
273 * @see #setExpandEntities
274 */
275 public boolean getExpandEntities() {
276 return expand;
277 }
278
279 /**
280 * Specifies whether or not the parser should elminate whitespace in
281 * element content (sometimes known as "ignorable whitespace") when
282 * building the document. Only whitespace which is contained within
283 * element content that has an element only content model will be
284 * eliminated (see XML Rec 3.2.1). For this setting to take effect
285 * requires that validation be turned on. The default value of this
286 * setting is <code>false</code>.
287 *
288 * @param ignoringWhite Whether to ignore ignorable whitespace
289 */
290 public void setIgnoringElementContentWhitespace(boolean ignoringWhite) {
291 this.ignoringWhite = ignoringWhite;
292 }
293
294 /**
295 * Specifies whether or not the parser should elminate text() nodes
296 * containing only whitespace when building the document. See
297 * {@link SAXBuilder#setIgnoringBoundaryWhitespace(boolean)}.
298 *
299 * @param ignoringBoundaryWhite Whether to ignore only whitespace content
300 */
301 public void setIgnoringBoundaryWhitespace(boolean ignoringBoundaryWhite) {
302 this.ignoringBoundaryWhite = ignoringBoundaryWhite;
303 }
304
305 /**
306 * Returns whether or not the parser will elminate element content
307 * containing only whitespace.
308 *
309 * @return <code>boolean</code> - whether only whitespace content will
310 * be ignored during build.
311 *
312 * @see #setIgnoringBoundaryWhitespace
313 */
314 public boolean getIgnoringBoundaryWhitespace() {
315 return ignoringBoundaryWhite;
316 }
317
318 /**
319 * Returns whether or not the parser will elminate whitespace in
320 * element content (sometimes known as "ignorable whitespace") when
321 * building the document.
322 *
323 * @return <code>boolean</code> - whether ignorable whitespace will
324 * be ignored during build.
325 *
326 * @see #setIgnoringElementContentWhitespace
327 */
328 public boolean getIgnoringElementContentWhitespace() {
329 return ignoringWhite;
330 }
331
332 public void startDocument() {
333 if (locator != null) {
334 document.setBaseURI(locator.getSystemId());
335 }
336 }
337
338 /**
339 * This is called when the parser encounters an external entity
340 * declaration.
341 *
342 * @param name entity name
343 * @param publicID public id
344 * @param systemID system id
345 * @throws SAXException when things go wrong
346 */
347 public void externalEntityDecl(String name,
348 String publicID, String systemID)
349 throws SAXException {
350 // Store the public and system ids for the name
351 externalEntities.put(name, new String[]{publicID, systemID});
352
353 if (!inInternalSubset) return;
354
355 internalSubset.append(" <!ENTITY ")
356 .append(name);
357 appendExternalId(publicID, systemID);
358 internalSubset.append(">\n");
359 }
360
361 /**
362 * This handles an attribute declaration in the internal subset.
363 *
364 * @param eName <code>String</code> element name of attribute
365 * @param aName <code>String</code> attribute name
366 * @param type <code>String</code> attribute type
367 * @param valueDefault <code>String</code> default value of attribute
368 * @param value <code>String</code> value of attribute
369 * @throws SAXException
370 */
371 public void attributeDecl(String eName, String aName, String type,
372 String valueDefault, String value)
373 throws SAXException {
374
375 if (!inInternalSubset) return;
376
377 internalSubset.append(" <!ATTLIST ")
378 .append(eName)
379 .append(' ')
380 .append(aName)
381 .append(' ')
382 .append(type)
383 .append(' ');
384 if (valueDefault != null) {
385 internalSubset.append(valueDefault);
386 } else {
387 internalSubset.append('\"')
388 .append(value)
389 .append('\"');
390 }
391 if ((valueDefault != null) && (valueDefault.equals("#FIXED"))) {
392 internalSubset.append(" \"")
393 .append(value)
394 .append('\"');
395 }
396 internalSubset.append(">\n");
397 }
398
399 /**
400 * Handle an element declaration in a DTD.
401 *
402 * @param name <code>String</code> name of element
403 * @param model <code>String</code> model of the element in DTD syntax
404 * @throws SAXException
405 */
406 public void elementDecl(String name, String model) throws SAXException {
407 // Skip elements that come from the external subset
408 if (!inInternalSubset) return;
409
410 internalSubset.append(" <!ELEMENT ")
411 .append(name)
412 .append(' ')
413 .append(model)
414 .append(">\n");
415 }
416
417 /**
418 * Handle an internal entity declaration in a DTD.
419 *
420 * @param name <code>String</code> name of entity
421 * @param value <code>String</code> value of the entity
422 * @throws SAXException
423 */
424 public void internalEntityDecl(String name, String value)
425 throws SAXException {
426
427 // Skip entities that come from the external subset
428 if (!inInternalSubset) return;
429
430 internalSubset.append(" <!ENTITY ");
431 if (name.startsWith("%")) {
432 internalSubset.append("% ").append(name.substring(1));
433 } else {
434 internalSubset.append(name);
435 }
436 internalSubset.append(" \"")
437 .append(value)
438 .append("\">\n");
439 }
440
441 /**
442 * This will indicate that a processing instruction has been encountered.
443 * (The XML declaration is not a processing instruction and will not
444 * be reported.)
445 *
446 * @param target <code>String</code> target of PI
447 * @param data <code>String</code> containing all data sent to the PI.
448 * This typically looks like one or more attribute value
449 * pairs.
450 * @throws SAXException when things go wrong
451 */
452 public void processingInstruction(String target, String data)
453 throws SAXException {
454
455 if (suppress) return;
456
457 flushCharacters();
458
459 if (atRoot) {
460 factory.addContent(document, factory.processingInstruction(target, data));
461 } else {
462 factory.addContent(getCurrentElement(),
463 factory.processingInstruction(target, data));
464 }
465 }
466
467 /**
468 * This indicates that an unresolvable entity reference has been
469 * encountered, normally because the external DTD subset has not been
470 * read.
471 *
472 * @param name <code>String</code> name of entity
473 * @throws SAXException when things go wrong
474 */
475 public void skippedEntity(String name)
476 throws SAXException {
477
478 // We don't handle parameter entity references.
479 if (name.startsWith("%")) return;
480
481 flushCharacters();
482
483 factory.addContent(getCurrentElement(), factory.entityRef(name));
484 }
485
486 /**
487 * This will add the prefix mapping to the JDOM
488 * <code>Document</code> object.
489 *
490 * @param prefix <code>String</code> namespace prefix.
491 * @param uri <code>String</code> namespace URI.
492 */
493 public void startPrefixMapping(String prefix, String uri)
494 throws SAXException {
495
496 if (suppress) return;
497
498 Namespace ns = Namespace.getNamespace(prefix, uri);
499 declaredNamespaces.add(ns);
500 }
501
502 /**
503 * This reports the occurrence of an actual element. It will include
504 * the element's attributes, with the exception of XML vocabulary
505 * specific attributes, such as
506 * <code>xmlns:[namespace prefix]</code> and
507 * <code>xsi:schemaLocation</code>.
508 *
509 * @param namespaceURI <code>String</code> namespace URI this element
510 * is associated with, or an empty
511 * <code>String</code>
512 * @param localName <code>String</code> name of element (with no
513 * namespace prefix, if one is present)
514 * @param qName <code>String</code> XML 1.0 version of element name:
515 * [namespace prefix]:[localName]
516 * @param atts <code>Attributes</code> list for this element
517 * @throws SAXException when things go wrong
518 */
519 public void startElement(String namespaceURI, String localName,
520 String qName, Attributes atts)
521 throws SAXException {
522 if (suppress) return;
523
524 Element element = null;
525
526 if ((namespaceURI != null) && (!namespaceURI.equals(""))) {
527 String prefix = "";
528
529 // Determine any prefix on the Element
530 if (!qName.equals(localName)) {
531 int split = qName.indexOf(":");
532 prefix = qName.substring(0, split);
533 }
534 Namespace elementNamespace =
535 Namespace.getNamespace(prefix, namespaceURI);
536 element = factory.element(localName, elementNamespace);
537 } else {
538 element = factory.element(localName);
539 }
540
541 // Take leftover declared namespaces and add them to this element's
542 // map of namespaces
543 if (declaredNamespaces.size() > 0) {
544 transferNamespaces(element);
545 }
546
547 // Handle attributes
548 for (int i=0, len=atts.getLength(); i<len; i++) {
549 Attribute attribute = null;
550
551 String attLocalName = atts.getLocalName(i);
552 String attQName = atts.getQName(i);
553 int attType = getAttributeType(atts.getType(i));
554
555 // Bypass any xmlns attributes which might appear, as we got
556 // them already in startPrefixMapping().
557 // This is sometimes necessary when SAXHandler is used with
558 // another source than SAXBuilder, as with JDOMResult.
559 if (attQName.startsWith("xmlns:") || attQName.equals("xmlns")) {
560 continue;
561 }
562
563 // First clause per http://markmail.org/message/2p245ggcjst27xe6
564 // patch from Mattias Jiderhamn
565 if ("".equals(attLocalName) && attQName.indexOf(":") == -1) {
566 attribute = factory.attribute(attQName, atts.getValue(i), attType);
567 } else if (!attQName.equals(attLocalName)) {
568 String attPrefix = attQName.substring(0, attQName.indexOf(":"));
569 Namespace attNs = Namespace.getNamespace(attPrefix,
570 atts.getURI(i));
571
572 attribute = factory.attribute(attLocalName, atts.getValue(i),
573 attType, attNs);
574 } else {
575 attribute = factory.attribute(attLocalName, atts.getValue(i),
576 attType);
577 }
578 factory.setAttribute(element, attribute);
579 }
580
581 flushCharacters();
582
583 if (atRoot) {
584 document.setRootElement(element); // XXX should we use a factory call?
585 atRoot = false;
586 } else {
587 factory.addContent(getCurrentElement(), element);
588 }
589 currentElement = element;
590 }
591
592 /**
593 * This will take the supplied <code>{@link Element}</code> and
594 * transfer its namespaces to the global namespace storage.
595 *
596 * @param element <code>Element</code> to read namespaces from.
597 */
598 private void transferNamespaces(Element element) {
599 Iterator i = declaredNamespaces.iterator();
600 while (i.hasNext()) {
601 Namespace ns = (Namespace)i.next();
602 if (ns != element.getNamespace()) {
603 element.addNamespaceDeclaration(ns);
604 }
605 }
606 declaredNamespaces.clear();
607 }
608
609 /**
610 * This will report character data (within an element).
611 *
612 * @param ch <code>char[]</code> character array with character data
613 * @param start <code>int</code> index in array where data starts.
614 * @param length <code>int</code> length of data.
615 * @throws SAXException
616 */
617 public void characters(char[] ch, int start, int length)
618 throws SAXException {
619
620 if (suppress || (length == 0))
621 return;
622
623 if (previousCDATA != inCDATA) {
624 flushCharacters();
625 }
626
627 textBuffer.append(ch, start, length);
628 }
629
630 /**
631 * Capture ignorable whitespace as text. If
632 * setIgnoringElementContentWhitespace(true) has been called then this
633 * method does nothing.
634 *
635 * @param ch <code>[]</code> - char array of ignorable whitespace
636 * @param start <code>int</code> - starting position within array
637 * @param length <code>int</code> - length of whitespace after start
638 * @throws SAXException when things go wrong
639 */
640 public void ignorableWhitespace(char[] ch, int start, int length)
641 throws SAXException {
642 if (!ignoringWhite) {
643 characters(ch, start, length);
644 }
645 }
646
647 /**
648 * This will flush any characters from SAX character calls we've
649 * been buffering.
650 *
651 * @throws SAXException when things go wrong
652 */
653 protected void flushCharacters() throws SAXException {
654 if (ignoringBoundaryWhite) {
655 if (!textBuffer.isAllWhitespace()) {
656 flushCharacters(textBuffer.toString());
657 }
658 }
659 else {
660 flushCharacters(textBuffer.toString());
661 }
662 textBuffer.clear();
663 }
664
665 /**
666 * Flush the given string into the document. This is a protected method
667 * so subclassers can control text handling without knowledge of the
668 * internals of this class.
669 *
670 * @param data string to flush
671 */
672 protected void flushCharacters(String data) throws SAXException {
673 if (data.length() == 0) {
674 previousCDATA = inCDATA;
675 return;
676 }
677
678 /**
679 * This is commented out because of some problems with
680 * the inline DTDs that Xerces seems to have.
681 if (!inDTD) {
682 if (inEntity) {
683 getCurrentElement().setContent(factory.text(data));
684 } else {
685 getCurrentElement().addContent(factory.text(data));
686 }
687 */
688
689 if (previousCDATA) {
690 factory.addContent(getCurrentElement(), factory.cdata(data));
691 }
692 else {
693 factory.addContent(getCurrentElement(), factory.text(data));
694 }
695
696 previousCDATA = inCDATA;
697 }
698
699 /**
700 * Indicates the end of an element
701 * (<code>&lt;/[element name]&gt;</code>) is reached. Note that
702 * the parser does not distinguish between empty
703 * elements and non-empty elements, so this will occur uniformly.
704 *
705 * @param namespaceURI <code>String</code> URI of namespace this
706 * element is associated with
707 * @param localName <code>String</code> name of element without prefix
708 * @param qName <code>String</code> name of element in XML 1.0 form
709 * @throws SAXException when things go wrong
710 */
711 public void endElement(String namespaceURI, String localName,
712 String qName) throws SAXException {
713
714 if (suppress) return;
715
716 flushCharacters();
717
718 if (!atRoot) {
719 Parent p = currentElement.getParent();
720 if (p instanceof Document) {
721 atRoot = true;
722 }
723 else {
724 currentElement = (Element) p;
725 }
726 }
727 else {
728 throw new SAXException(
729 "Ill-formed XML document (missing opening tag for " +
730 localName + ")");
731 }
732 }
733
734 /**
735 * This will signify that a DTD is being parsed, and can be
736 * used to ensure that comments and other lexical structures
737 * in the DTD are not added to the JDOM <code>Document</code>
738 * object.
739 *
740 * @param name <code>String</code> name of element listed in DTD
741 * @param publicID <code>String</code> public ID of DTD
742 * @param systemID <code>String</code> system ID of DTD
743 */
744 public void startDTD(String name, String publicID, String systemID)
745 throws SAXException {
746
747 flushCharacters(); // Is this needed here?
748
749 factory.addContent(document, factory.docType(name, publicID, systemID));
750 inDTD = true;
751 inInternalSubset = true;
752 }
753
754 /**
755 * This signifies that the reading of the DTD is complete.
756 *
757 * @throws SAXException
758 */
759 public void endDTD() throws SAXException {
760
761 document.getDocType().setInternalSubset(internalSubset.toString());
762 inDTD = false;
763 inInternalSubset = false;
764 }
765
766 public void startEntity(String name) throws SAXException {
767 entityDepth++;
768
769 if (expand || entityDepth > 1) {
770 // Short cut out if we're expanding or if we're nested
771 return;
772 }
773
774 // A "[dtd]" entity indicates the beginning of the external subset
775 if (name.equals("[dtd]")) {
776 inInternalSubset = false;
777 return;
778 }
779
780 // Ignore DTD references, and translate the standard 5
781 if ((!inDTD) &&
782 (!name.equals("amp")) &&
783 (!name.equals("lt")) &&
784 (!name.equals("gt")) &&
785 (!name.equals("apos")) &&
786 (!name.equals("quot"))) {
787
788 if (!expand) {
789 String pub = null;
790 String sys = null;
791 String[] ids = (String[]) externalEntities.get(name);
792 if (ids != null) {
793 pub = ids[0]; // may be null, that's OK
794 sys = ids[1]; // may be null, that's OK
795 }
796 /**
797 * if no current element, this entity belongs to an attribute
798 * in these cases, it is an error on the part of the parser
799 * to call startEntity but this will help in some cases.
800 * See org/xml/sax/ext/LexicalHandler.html#startEntity(java.lang.String)
801 * for more information
802 */
803 if (!atRoot) {
804 flushCharacters();
805 EntityRef entity = factory.entityRef(name, pub, sys);
806
807 // no way to tell if the entity was from an attribute or element so just assume element
808 factory.addContent(getCurrentElement(), entity);
809 }
810 suppress = true;
811 }
812 }
813 }
814
815 public void endEntity(String name) throws SAXException {
816 entityDepth--;
817 if (entityDepth == 0) {
818 // No way are we suppressing if not in an entity,
819 // regardless of the "expand" value
820 suppress = false;
821 }
822 if (name.equals("[dtd]")) {
823 inInternalSubset = true;
824 }
825 }
826
827 /**
828 * Report a CDATA section
829 *
830 * @throws SAXException
831 */
832 public void startCDATA() throws SAXException {
833 if (suppress) return;
834
835 inCDATA = true;
836 }
837
838 /**
839 * Report a CDATA section
840 */
841 public void endCDATA() throws SAXException {
842 if (suppress) return;
843
844 previousCDATA = true;
845 inCDATA = false;
846 }
847
848 /**
849 * This reports that a comments is parsed. If not in the
850 * DTD, this comment is added to the current JDOM
851 * <code>Element</code>, or the <code>Document</code> itself
852 * if at that level.
853 *
854 * @param ch <code>ch[]</code> array of comment characters.
855 * @param start <code>int</code> index to start reading from.
856 * @param length <code>int</code> length of data.
857 * @throws SAXException
858 */
859 public void comment(char[] ch, int start, int length)
860 throws SAXException {
861
862 if (suppress) return;
863
864 flushCharacters();
865
866 String commentText = new String(ch, start, length);
867 if (inDTD && inInternalSubset && (expand == false)) {
868 internalSubset.append(" <!--")
869 .append(commentText)
870 .append("-->\n");
871 return;
872 }
873 if ((!inDTD) && (!commentText.equals(""))) {
874 if (atRoot) {
875 factory.addContent(document, factory.comment(commentText));
876 } else {
877 factory.addContent(getCurrentElement(), factory.comment(commentText));
878 }
879 }
880 }
881
882 /**
883 * Handle the declaration of a Notation in a DTD
884 *
885 * @param name name of the notation
886 * @param publicID the public ID of the notation
887 * @param systemID the system ID of the notation
888 */
889 public void notationDecl(String name, String publicID, String systemID)
890 throws SAXException {
891
892 if (!inInternalSubset) return;
893
894 internalSubset.append(" <!NOTATION ")
895 .append(name);
896 appendExternalId(publicID, systemID);
897 internalSubset.append(">\n");
898 }
899
900 /**
901 * Handler for unparsed entity declarations in the DTD
902 *
903 * @param name <code>String</code> of the unparsed entity decl
904 * @param publicID <code>String</code> of the unparsed entity decl
905 * @param systemID <code>String</code> of the unparsed entity decl
906 * @param notationName <code>String</code> of the unparsed entity decl
907 */
908 public void unparsedEntityDecl(String name, String publicID,
909 String systemID, String notationName)
910 throws SAXException {
911
912 if (!inInternalSubset) return;
913
914 internalSubset.append(" <!ENTITY ")
915 .append(name);
916 appendExternalId(publicID, systemID);
917 internalSubset.append(" NDATA ")
918 .append(notationName);
919 internalSubset.append(">\n");
920 }
921
922 /**
923 * Appends an external ID to the internal subset buffer. Either publicID
924 * or systemID may be null, but not both.
925 *
926 * @param publicID the public ID
927 * @param systemID the system ID
928 */
929 private void appendExternalId(String publicID, String systemID) {
930 if (publicID != null) {
931 internalSubset.append(" PUBLIC \"")
932 .append(publicID)
933 .append('\"');
934 }
935 if (systemID != null) {
936 if (publicID == null) {
937 internalSubset.append(" SYSTEM ");
938 }
939 else {
940 internalSubset.append(' ');
941 }
942 internalSubset.append('\"')
943 .append(systemID)
944 .append('\"');
945 }
946 }
947
948 /**
949 * Returns the being-parsed element.
950 *
951 * @return <code>Element</code> - element being built.
952 * @throws SAXException
953 */
954 public Element getCurrentElement() throws SAXException {
955 if (currentElement == null) {
956 throw new SAXException(
957 "Ill-formed XML document (multiple root elements detected)");
958 }
959 return currentElement;
960 }
961
962 /**
963 * Returns the the JDOM Attribute type value from the SAX 2.0
964 * attribute type string provided by the parser.
965 *
966 * @param typeName <code>String</code> the SAX 2.0 attribute
967 * type string.
968 *
969 * @return <code>int</code> the JDOM attribute type.
970 *
971 * @see Attribute#setAttributeType
972 * @see Attributes#getType
973 */
974 private static int getAttributeType(String typeName) {
975 Integer type = (Integer)(attrNameToTypeMap.get(typeName));
976 if (type == null) {
977 if (typeName != null && typeName.length() > 0 &&
978 typeName.charAt(0) == '(') {
979 // Xerces 1.4.X reports attributes of enumerated type with
980 // a type string equals to the enumeration definition, i.e.
981 // starting with a parenthesis.
982 return Attribute.ENUMERATED_TYPE;
983 }
984 else {
985 return Attribute.UNDECLARED_TYPE;
986 }
987 } else {
988 return type.intValue();
989 }
990 }
991
992 /**
993 * Receives an object for locating the origin of SAX document
994 * events. This method is invoked by the SAX parser.
995 * <p>
996 * {@link org.jdom.JDOMFactory} implementations can use the
997 * {@link #getDocumentLocator} method to get access to the
998 * {@link Locator} during parse.
999 * </p>
1000 *
1001 * @param locator <code>Locator</code> an object that can return
1002 * the location of any SAX document event.
1003 */
1004 public void setDocumentLocator(Locator locator) {
1005 this.locator = locator;
1006 }
1007
1008 /**
1009 * Provides access to the {@link Locator} object provided by the
1010 * SAX parser.
1011 *
1012 * @return <code>Locator</code> an object that can return
1013 * the location of any SAX document event.
1014 */
1015 public Locator getDocumentLocator() {
1016 return locator;
1017 }
1018 }