comparison NGSrich_0.5.5/src/org/jdom/input/SAXBuilder.java @ 0:89ad0a9cca52 default tip

Uploaded
author pfrommolt
date Mon, 21 Nov 2011 08:12:19 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:89ad0a9cca52
1 /*--
2
3 $Id: SAXBuilder.java,v 1.93 2009/07/23 06:26:26 jhunter Exp $
4
5 Copyright (C) 2000-2007 Jason Hunter & Brett McLaughlin.
6 All rights reserved.
7
8 Redistribution and use in source and binary forms, with or without
9 modification, are permitted provided that the following conditions
10 are met:
11
12 1. Redistributions of source code must retain the above copyright
13 notice, this list of conditions, and the following disclaimer.
14
15 2. Redistributions in binary form must reproduce the above copyright
16 notice, this list of conditions, and the disclaimer that follows
17 these conditions in the documentation and/or other materials
18 provided with the distribution.
19
20 3. The name "JDOM" must not be used to endorse or promote products
21 derived from this software without prior written permission. For
22 written permission, please contact <request_AT_jdom_DOT_org>.
23
24 4. Products derived from this software may not be called "JDOM", nor
25 may "JDOM" appear in their name, without prior written permission
26 from the JDOM Project Management <request_AT_jdom_DOT_org>.
27
28 In addition, we request (but do not require) that you include in the
29 end-user documentation provided with the redistribution and/or in the
30 software itself an acknowledgement equivalent to the following:
31 "This product includes software developed by the
32 JDOM Project (http://www.jdom.org/)."
33 Alternatively, the acknowledgment may be graphical using the logos
34 available at http://www.jdom.org/images/logos.
35
36 THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
37 WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
38 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
39 DISCLAIMED. IN NO EVENT SHALL THE JDOM AUTHORS OR THE PROJECT
40 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
42 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
43 USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
44 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
45 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
46 OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
47 SUCH DAMAGE.
48
49 This software consists of voluntary contributions made by many
50 individuals on behalf of the JDOM Project and was originally
51 created by Jason Hunter <jhunter_AT_jdom_DOT_org> and
52 Brett McLaughlin <brett_AT_jdom_DOT_org>. For more information
53 on the JDOM Project, please see <http://www.jdom.org/>.
54
55 */
56
57 package org.jdom.input;
58
59 import java.io.*;
60 import java.lang.reflect.*;
61 import java.net.*;
62 import java.util.*;
63
64 import org.jdom.*;
65
66 import org.xml.sax.*;
67 import org.xml.sax.helpers.XMLReaderFactory;
68
69 /**
70 * Builds a JDOM document from files, streams, readers, URLs, or a SAX {@link
71 * org.xml.sax.InputSource} instance using a SAX parser. The builder uses a
72 * third-party SAX parser (chosen by JAXP by default, or you can choose
73 * manually) to handle the parsing duties and simply listens to the SAX events
74 * to construct a document. Details which SAX does not provide, such as
75 * whitespace outside the root element, are not represented in the JDOM
76 * document. Information about SAX can be found at <a
77 * href="http://www.saxproject.org">http://www.saxproject.org</a>.
78 * <p>
79 * Known issues: Relative paths for a {@link DocType} or {@link EntityRef} may
80 * be converted by the SAX parser into absolute paths.
81 *
82 * @version $Revision: 1.93 $, $Date: 2009/07/23 06:26:26 $
83 * @author Jason Hunter
84 * @author Brett McLaughlin
85 * @author Dan Schaffer
86 * @author Philip Nelson
87 * @author Alex Rosen
88 */
89 public class SAXBuilder {
90
91 private static final String CVS_ID =
92 "@(#) $RCSfile: SAXBuilder.java,v $ $Revision: 1.93 $ $Date: 2009/07/23 06:26:26 $ $Name: jdom_1_1_1 $";
93
94 /**
95 * Default parser class to use. This is used when no other parser
96 * is given and JAXP isn't available.
97 */
98 private static final String DEFAULT_SAX_DRIVER =
99 "org.apache.xerces.parsers.SAXParser";
100
101 /** Whether validation should occur */
102 private boolean validate;
103
104 /** Whether expansion of entities should occur */
105 private boolean expand = true;
106
107 /** Adapter class to use */
108 private String saxDriverClass;
109
110 /** ErrorHandler class to use */
111 private ErrorHandler saxErrorHandler = null;
112
113 /** EntityResolver class to use */
114 private EntityResolver saxEntityResolver = null;
115
116 /** DTDHandler class to use */
117 private DTDHandler saxDTDHandler = null;
118
119 /** XMLFilter instance to use */
120 private XMLFilter saxXMLFilter = null;
121
122 /** The factory for creating new JDOM objects */
123 private JDOMFactory factory = new DefaultJDOMFactory();
124
125 /** Whether to ignore ignorable whitespace */
126 private boolean ignoringWhite = false;
127
128 /** Whether to ignore all whitespace content */
129 private boolean ignoringBoundaryWhite = false;
130
131 /** User-specified features to be set on the SAX parser */
132 private HashMap features = new HashMap(5);
133
134 /** User-specified properties to be set on the SAX parser */
135 private HashMap properties = new HashMap(5);
136
137 /** Whether to use fast parser reconfiguration */
138 private boolean fastReconfigure = false;
139
140 /** Whether to try lexical reporting in fast parser reconfiguration */
141 private boolean skipNextLexicalReportingConfig = false;
142
143 /** Whether to to try entity expansion in fast parser reconfiguration */
144 private boolean skipNextEntityExpandConfig = false;
145
146 /**
147 * Whether parser reuse is allowed.
148 * <p>Default: <code>true</code></p>
149 */
150 private boolean reuseParser = true;
151
152 /** The current SAX parser, if parser reuse has been activated. */
153 private XMLReader saxParser = null;
154
155 /**
156 * Creates a new SAXBuilder which will attempt to first locate
157 * a parser via JAXP, then will try to use a set of default
158 * SAX Drivers. The underlying parser will not validate.
159 */
160 public SAXBuilder() {
161 this(false);
162 }
163
164 /**
165 * Creates a new SAXBuilder which will attempt to first locate
166 * a parser via JAXP, then will try to use a set of default
167 * SAX Drivers. The underlying parser will validate or not
168 * according to the given parameter.
169 *
170 * @param validate <code>boolean</code> indicating if
171 * validation should occur.
172 */
173 public SAXBuilder(boolean validate) {
174 this.validate = validate;
175 }
176
177 /**
178 * Creates a new SAXBuilder using the specified SAX parser.
179 * The underlying parser will not validate.
180 *
181 * @param saxDriverClass <code>String</code> name of SAX Driver
182 * to use for parsing.
183 */
184 public SAXBuilder(String saxDriverClass) {
185 this(saxDriverClass, false);
186 }
187
188 /**
189 * Creates a new SAXBuilder using the specified SAX parser.
190 * The underlying parser will validate or not
191 * according to the given parameter.
192 *
193 * @param saxDriverClass <code>String</code> name of SAX Driver
194 * to use for parsing.
195 * @param validate <code>boolean</code> indicating if
196 * validation should occur.
197 */
198 public SAXBuilder(String saxDriverClass, boolean validate) {
199 this.saxDriverClass = saxDriverClass;
200 this.validate = validate;
201 }
202
203 /**
204 * Returns the driver class assigned in the constructor, or null if none.
205 *
206 * @return the driver class assigned in the constructor
207 */
208 public String getDriverClass() {
209 return saxDriverClass;
210 }
211
212 /**
213 * Returns the current {@link org.jdom.JDOMFactory} in use.
214 * @return the factory in use
215 */
216 public JDOMFactory getFactory() {
217 return factory;
218 }
219
220 /**
221 * This sets a custom JDOMFactory for the builder. Use this to build
222 * the tree with your own subclasses of the JDOM classes.
223 *
224 * @param factory <code>JDOMFactory</code> to use
225 */
226 public void setFactory(JDOMFactory factory) {
227 this.factory = factory;
228 }
229
230 /**
231 * Returns whether validation is to be performed during the build.
232 *
233 * @return whether validation is to be performed during the build
234 */
235 public boolean getValidation() {
236 return validate;
237 }
238
239 /**
240 * This sets validation for the builder.
241 *
242 * @param validate <code>boolean</code> indicating whether validation
243 * should occur.
244 */
245 public void setValidation(boolean validate) {
246 this.validate = validate;
247 }
248
249 /**
250 * Returns the {@link ErrorHandler} assigned, or null if none.
251 * @return the ErrorHandler assigned, or null if none
252 */
253 public ErrorHandler getErrorHandler() {
254 return saxErrorHandler;
255 }
256
257 /**
258 * This sets custom ErrorHandler for the <code>Builder</code>.
259 *
260 * @param errorHandler <code>ErrorHandler</code>
261 */
262 public void setErrorHandler(ErrorHandler errorHandler) {
263 saxErrorHandler = errorHandler;
264 }
265
266 /**
267 * Returns the {@link EntityResolver} assigned, or null if none.
268 *
269 * @return the EntityResolver assigned
270 */
271 public EntityResolver getEntityResolver() {
272 return saxEntityResolver;
273 }
274
275 /**
276 * This sets custom EntityResolver for the <code>Builder</code>.
277 *
278 * @param entityResolver <code>EntityResolver</code>
279 */
280 public void setEntityResolver(EntityResolver entityResolver) {
281 saxEntityResolver = entityResolver;
282 }
283
284 /**
285 * Returns the {@link DTDHandler} assigned, or null if none.
286 *
287 * @return the DTDHandler assigned
288 */
289 public DTDHandler getDTDHandler() {
290 return saxDTDHandler;
291 }
292
293 /**
294 * This sets custom DTDHandler for the <code>Builder</code>.
295 *
296 * @param dtdHandler <code>DTDHandler</code>
297 */
298 public void setDTDHandler(DTDHandler dtdHandler) {
299 saxDTDHandler = dtdHandler;
300 }
301
302 /**
303 * Returns the {@link XMLFilter} used during parsing, or null if none.
304 *
305 * @return the XMLFilter used during parsing
306 */
307 public XMLFilter getXMLFilter() {
308 return saxXMLFilter;
309 }
310
311 /**
312 * This sets a custom {@link org.xml.sax.XMLFilter} for the builder.
313 *
314 * @param xmlFilter the filter to use
315 */
316 public void setXMLFilter(XMLFilter xmlFilter) {
317 saxXMLFilter = xmlFilter;
318 }
319
320 /**
321 * Returns whether element content whitespace is to be ignored during the
322 * build.
323 *
324 * @return whether element content whitespace is to be ignored during the
325 * build
326 */
327 public boolean getIgnoringElementContentWhitespace() {
328 return ignoringWhite;
329 }
330
331 /**
332 * Specifies whether or not the parser should elminate whitespace in
333 * element content (sometimes known as "ignorable whitespace") when
334 * building the document. Only whitespace which is contained within
335 * element content that has an element only content model will be
336 * eliminated (see XML Rec 3.2.1). For this setting to take effect
337 * requires that validation be turned on. The default value of this
338 * setting is <code>false</code>.
339 *
340 * @param ignoringWhite Whether to ignore ignorable whitespace
341 */
342 public void setIgnoringElementContentWhitespace(boolean ignoringWhite) {
343 this.ignoringWhite = ignoringWhite;
344 }
345
346 /**
347 * Returns whether or not the parser will elminate element content
348 * containing only whitespace.
349 *
350 * @return <code>boolean</code> - whether only whitespace content will
351 * be ignored during build.
352 *
353 * @see #setIgnoringBoundaryWhitespace
354 */
355 public boolean getIgnoringBoundaryWhitespace() {
356 return ignoringBoundaryWhite;
357 }
358
359 /**
360 * Specifies whether or not the parser should elminate boundary whitespace,
361 * a term that indicates whitespace-only text between element tags. This
362 * feature is a lot like {@link #setIgnoringElementContentWhitespace(boolean)}
363 * but this feature is more aggressive and doesn't require validation be
364 * turned on. The {@link #setIgnoringElementContentWhitespace(boolean)}
365 * call impacts the SAX parse process while this method impacts the JDOM
366 * build process, so it can be beneficial to turn both on for efficiency.
367 * For implementation efficiency, this method actually removes all
368 * whitespace-only text() nodes. That can, in some cases (like beteween an
369 * element tag and a comment), include whitespace that isn't just boundary
370 * whitespace. The default is <code>false</code>.
371 *
372 * @param ignoringBoundaryWhite Whether to ignore whitespace-only text
373 * noes
374 */
375 public void setIgnoringBoundaryWhitespace(boolean ignoringBoundaryWhite) {
376 this.ignoringBoundaryWhite = ignoringBoundaryWhite;
377 }
378
379 /**
380 * Returns whether the contained SAX parser instance is reused across
381 * multiple parses. The default is true.
382 *
383 * @return whether the contained SAX parser instance is reused across
384 * multiple parses
385 */
386 public boolean getReuseParser() {
387 return reuseParser;
388 }
389
390 /**
391 * Specifies whether this builder shall reuse the same SAX parser
392 * when performing subsequent parses or allocate a new parser for
393 * each parse. The default value of this setting is
394 * <code>true</code> (parser reuse).
395 * <p>
396 * <strong>Note</strong>: As SAX parser instances are not thread safe,
397 * the parser reuse feature should not be used with SAXBuilder instances
398 * shared among threads.</p>
399 *
400 * @param reuseParser Whether to reuse the SAX parser.
401 */
402 public void setReuseParser(boolean reuseParser) {
403 this.reuseParser = reuseParser;
404 this.saxParser = null;
405 }
406
407 /**
408 * Specifies whether this builder will do fast reconfiguration of the
409 * underlying SAX parser when reuseParser is true. This improves
410 * performance in cases where SAXBuilders are reused and lots of small
411 * documents are frequently parsed. This avoids attempting to set features
412 * on the SAX parser each time build() is called which result in
413 * SaxNotRecognizedExceptions. This should ONLY be set for builders where
414 * this specific case is an issue. The default value of this setting is
415 * <code>false</code> (no fast reconfiguration). If reuseParser is false,
416 * calling this has no effect.
417 *
418 * @param fastReconfigure Whether to do a fast reconfiguration of the parser
419 */
420 public void setFastReconfigure(boolean fastReconfigure) {
421 if (this.reuseParser) {
422 this.fastReconfigure = fastReconfigure;
423 }
424 }
425
426 /**
427 * This sets a feature on the SAX parser. See the SAX documentation for .
428 * more information.
429 * </p>
430 * <p>
431 * NOTE: SAXBuilder requires that some particular features of the SAX parser be
432 * set up in certain ways for it to work properly. The list of such features
433 * may change in the future. Therefore, the use of this method may cause
434 * parsing to break, and even if it doesn't break anything today it might
435 * break parsing in a future JDOM version, because what JDOM parsers require
436 * may change over time. Use with caution.
437 * </p>
438 *
439 * @param name The feature name, which is a fully-qualified URI.
440 * @param value The requested state of the feature (true or false).
441 */
442 public void setFeature(String name, boolean value) {
443 // Save the specified feature for later.
444 features.put(name, value ? Boolean.TRUE : Boolean.FALSE);
445 }
446
447 /**
448 * This sets a property on the SAX parser. See the SAX documentation for
449 * more information.
450 * <p>
451 * NOTE: SAXBuilder requires that some particular properties of the SAX parser be
452 * set up in certain ways for it to work properly. The list of such properties
453 * may change in the future. Therefore, the use of this method may cause
454 * parsing to break, and even if it doesn't break anything today it might
455 * break parsing in a future JDOM version, because what JDOM parsers require
456 * may change over time. Use with caution.
457 * </p>
458 *
459 * @param name The property name, which is a fully-qualified URI.
460 * @param value The requested value for the property.
461 */
462 public void setProperty(String name, Object value) {
463 // Save the specified property for later.
464 properties.put(name, value);
465 }
466
467 /**
468 * This builds a document from the supplied
469 * input source.
470 *
471 * @param in <code>InputSource</code> to read from
472 * @return <code>Document</code> resultant Document object
473 * @throws JDOMException when errors occur in parsing
474 * @throws IOException when an I/O error prevents a document
475 * from being fully parsed
476 */
477 public Document build(InputSource in)
478 throws JDOMException, IOException {
479 SAXHandler contentHandler = null;
480
481 try {
482 // Create and configure the content handler.
483 contentHandler = createContentHandler();
484 configureContentHandler(contentHandler);
485
486 XMLReader parser = this.saxParser;
487 if (parser == null) {
488 // Create and configure the parser.
489 parser = createParser();
490
491 // Install optional filter
492 if (saxXMLFilter != null) {
493 // Connect filter chain to parser
494 XMLFilter root = saxXMLFilter;
495 while (root.getParent() instanceof XMLFilter) {
496 root = (XMLFilter)root.getParent();
497 }
498 root.setParent(parser);
499
500 // Read from filter
501 parser = saxXMLFilter;
502 }
503
504 // Configure parser
505 configureParser(parser, contentHandler);
506
507 if (reuseParser) {
508 this.saxParser = parser;
509 }
510 }
511 else {
512 // Reset content handler as SAXHandler instances cannot
513 // be reused
514 configureParser(parser, contentHandler);
515 }
516
517 // Parse the document.
518 parser.parse(in);
519
520 return contentHandler.getDocument();
521 }
522 catch (SAXParseException e) {
523 Document doc = contentHandler.getDocument();
524 if (doc.hasRootElement() == false) {
525 doc = null;
526 }
527
528 String systemId = e.getSystemId();
529 if (systemId != null) {
530 throw new JDOMParseException("Error on line " +
531 e.getLineNumber() + " of document " + systemId, e, doc);
532 } else {
533 throw new JDOMParseException("Error on line " +
534 e.getLineNumber(), e, doc);
535 }
536 }
537 catch (SAXException e) {
538 throw new JDOMParseException("Error in building: " +
539 e.getMessage(), e, contentHandler.getDocument());
540 }
541 finally {
542 // Explicitly nullify the handler to encourage GC
543 // It's a stack var so this shouldn't be necessary, but it
544 // seems to help on some JVMs
545 contentHandler = null;
546 }
547 }
548
549 /**
550 * This creates the SAXHandler that will be used to build the Document.
551 *
552 * @return <code>SAXHandler</code> - resultant SAXHandler object.
553 */
554 protected SAXHandler createContentHandler() {
555 SAXHandler contentHandler = new SAXHandler(factory);
556 return contentHandler;
557 }
558
559 /**
560 * This configures the SAXHandler that will be used to build the Document.
561 * <p>
562 * The default implementation simply passes through some configuration
563 * settings that were set on the SAXBuilder: setExpandEntities() and
564 * setIgnoringElementContentWhitespace().
565 * </p>
566 * @param contentHandler The SAXHandler to configure
567 */
568 protected void configureContentHandler(SAXHandler contentHandler) {
569 // Setup pass through behavior
570 contentHandler.setExpandEntities(expand);
571 contentHandler.setIgnoringElementContentWhitespace(ignoringWhite);
572 contentHandler.setIgnoringBoundaryWhitespace(ignoringBoundaryWhite);
573 }
574
575 /**
576 * This creates the XMLReader to be used for reading the XML document.
577 * <p>
578 * The default behavior is to (1) use the saxDriverClass, if it has been
579 * set, (2) try to obtain a parser from JAXP, if it is available, and
580 * (3) if all else fails, use a hard-coded default parser (currently
581 * the Xerces parser). Subclasses may override this method to determine
582 * the parser to use in a different way.
583 * </p>
584 *
585 * @return <code>XMLReader</code> - resultant XMLReader object.
586 * @throws org.jdom.JDOMException
587 */
588 protected XMLReader createParser() throws JDOMException {
589 XMLReader parser = null;
590 if (saxDriverClass != null) {
591 // The user knows that they want to use a particular class
592 try {
593 parser = XMLReaderFactory.createXMLReader(saxDriverClass);
594
595 // Configure parser
596 setFeaturesAndProperties(parser, true);
597 }
598 catch (SAXException e) {
599 throw new JDOMException("Could not load " + saxDriverClass, e);
600 }
601 } else {
602 // Try using JAXP...
603 // Note we need JAXP 1.1, and if JAXP 1.0 is all that's
604 // available then the getXMLReader call fails and we skip
605 // to the hard coded default parser
606 try {
607 // Get factory class and method.
608 Class factoryClass =
609 Class.forName("org.jdom.input.JAXPParserFactory");
610
611 Method createParser =
612 factoryClass.getMethod("createParser",
613 new Class[] { boolean.class, Map.class, Map.class });
614
615 // Create SAX parser.
616 parser = (XMLReader)createParser.invoke(null,
617 new Object[] { validate ? Boolean.TRUE : Boolean.FALSE,
618 features, properties });
619
620 // Configure parser
621 setFeaturesAndProperties(parser, false);
622 }
623 catch (JDOMException e) {
624 throw e;
625 }
626 catch (NoClassDefFoundError e) {
627 // The class loader failed to resolve the dependencies
628 // of org.jdom.input.JAXPParserFactory. This probably means
629 // that no JAXP parser is present in its class path.
630 // => Ignore and try allocating default SAX parser instance.
631 }
632 catch (Exception e) {
633 // Ignore and try allocating default SAX parser instance.
634 }
635 }
636
637 // Check to see if we got a parser yet, if not, try to use a
638 // hard coded default
639 if (parser == null) {
640 try {
641 parser = XMLReaderFactory.createXMLReader(DEFAULT_SAX_DRIVER);
642 // System.out.println("using default " + DEFAULT_SAX_DRIVER);
643 saxDriverClass = parser.getClass().getName();
644
645 // Configure parser
646 setFeaturesAndProperties(parser, true);
647 }
648 catch (SAXException e) {
649 throw new JDOMException("Could not load default SAX parser: "
650 + DEFAULT_SAX_DRIVER, e);
651 }
652 }
653
654 return parser;
655 }
656
657 /**
658 * This configures the XMLReader to be used for reading the XML document.
659 * <p>
660 * The default implementation sets various options on the given XMLReader,
661 * such as validation, DTD resolution, entity handlers, etc., according
662 * to the options that were set (e.g. via <code>setEntityResolver</code>)
663 * and set various SAX properties and features that are required for JDOM
664 * internals. These features may change in future releases, so change this
665 * behavior at your own risk.
666 * </p>
667 * @param parser
668 * @param contentHandler
669 * @throws org.jdom.JDOMException
670 */
671 protected void configureParser(XMLReader parser, SAXHandler contentHandler)
672 throws JDOMException {
673
674 // Setup SAX handlers.
675
676 parser.setContentHandler(contentHandler);
677
678 if (saxEntityResolver != null) {
679 parser.setEntityResolver(saxEntityResolver);
680 }
681
682 if (saxDTDHandler != null) {
683 parser.setDTDHandler(saxDTDHandler);
684 } else {
685 parser.setDTDHandler(contentHandler);
686 }
687
688 if (saxErrorHandler != null) {
689 parser.setErrorHandler(saxErrorHandler);
690 } else {
691 parser.setErrorHandler(new BuilderErrorHandler());
692 }
693
694 // If fastReconfigure is enabled and we failed in the previous attempt
695 // in configuring lexical reporting, then we skip this step. This
696 // saves the work of repeated exception handling on each parse.
697 if (!skipNextLexicalReportingConfig) {
698 boolean success = false;
699
700 try {
701 parser.setProperty("http://xml.org/sax/handlers/LexicalHandler",
702 contentHandler);
703 success = true;
704 } catch (SAXNotSupportedException e) {
705 // No lexical reporting available
706 } catch (SAXNotRecognizedException e) {
707 // No lexical reporting available
708 }
709
710 // Some parsers use alternate property for lexical handling (grr...)
711 if (!success) {
712 try {
713 parser.setProperty("http://xml.org/sax/properties/lexical-handler",
714 contentHandler);
715 success = true;
716 } catch (SAXNotSupportedException e) {
717 // No lexical reporting available
718 } catch (SAXNotRecognizedException e) {
719 // No lexical reporting available
720 }
721 }
722
723 // If unable to configure this property and fastReconfigure is
724 // enabled, then setup to avoid this code path entirely next time.
725 if (!success && fastReconfigure) {
726 skipNextLexicalReportingConfig = true;
727 }
728 }
729
730 // If fastReconfigure is enabled and we failed in the previous attempt
731 // in configuring entity expansion, then skip this step. This
732 // saves the work of repeated exception handling on each parse.
733 if (!skipNextEntityExpandConfig) {
734 boolean success = false;
735
736 // Try setting the DeclHandler if entity expansion is off
737 if (!expand) {
738 try {
739 parser.setProperty("http://xml.org/sax/properties/declaration-handler",
740 contentHandler);
741 success = true;
742 } catch (SAXNotSupportedException e) {
743 // No lexical reporting available
744 } catch (SAXNotRecognizedException e) {
745 // No lexical reporting available
746 }
747 }
748
749 /* If unable to configure this property and fastReconfigure is
750 * enabled, then setup to avoid this code path entirely next time.
751 */
752 if (!success && fastReconfigure) {
753 skipNextEntityExpandConfig = true;
754 }
755 }
756 }
757
758 private void setFeaturesAndProperties(XMLReader parser,
759 boolean coreFeatures)
760 throws JDOMException {
761 // Set any user-specified features on the parser.
762 Iterator iter = features.keySet().iterator();
763 while (iter.hasNext()) {
764 String name = (String)iter.next();
765 Boolean value = (Boolean)features.get(name);
766 internalSetFeature(parser, name, value.booleanValue(), name);
767 }
768
769 // Set any user-specified properties on the parser.
770 iter = properties.keySet().iterator();
771 while (iter.hasNext()) {
772 String name = (String)iter.next();
773 internalSetProperty(parser, name, properties.get(name), name);
774 }
775
776 if (coreFeatures) {
777 // Set validation.
778 try {
779 internalSetFeature(parser,
780 "http://xml.org/sax/features/validation",
781 validate, "Validation");
782 } catch (JDOMException e) {
783 // If validation is not supported, and the user is requesting
784 // that we don't validate, that's fine - don't throw an
785 // exception.
786 if (validate)
787 throw e;
788 }
789
790 // Setup some namespace features.
791 internalSetFeature(parser,
792 "http://xml.org/sax/features/namespaces",
793 true, "Namespaces");
794 internalSetFeature(parser,
795 "http://xml.org/sax/features/namespace-prefixes",
796 true, "Namespace prefixes");
797 }
798
799 // Set entity expansion
800 // Note SAXHandler can work regardless of how this is set, but when
801 // entity expansion it's worth it to try to tell the parser not to
802 // even bother with external general entities.
803 // Apparently no parsers yet support this feature.
804 // XXX It might make sense to setEntityResolver() with a resolver
805 // that simply ignores external general entities
806 try {
807 if (parser.getFeature("http://xml.org/sax/features/external-general-entities") != expand) {
808 parser.setFeature("http://xml.org/sax/features/external-general-entities", expand);
809 }
810 }
811 catch (SAXNotRecognizedException e) { /* Ignore... */ }
812 catch (SAXNotSupportedException e) { /* Ignore... */ }
813 }
814
815 /**
816 * Tries to set a feature on the parser. If the feature cannot be set,
817 * throws a JDOMException describing the problem.
818 */
819 private void internalSetFeature(XMLReader parser, String feature,
820 boolean value, String displayName) throws JDOMException {
821 try {
822 parser.setFeature(feature, value);
823 } catch (SAXNotSupportedException e) {
824 throw new JDOMException(
825 displayName + " feature not supported for SAX driver " + parser.getClass().getName());
826 } catch (SAXNotRecognizedException e) {
827 throw new JDOMException(
828 displayName + " feature not recognized for SAX driver " + parser.getClass().getName());
829 }
830 }
831
832 /**
833 * <p>
834 * Tries to set a property on the parser. If the property cannot be set,
835 * throws a JDOMException describing the problem.
836 * </p>
837 */
838 private void internalSetProperty(XMLReader parser, String property,
839 Object value, String displayName) throws JDOMException {
840 try {
841 parser.setProperty(property, value);
842 } catch (SAXNotSupportedException e) {
843 throw new JDOMException(
844 displayName + " property not supported for SAX driver " + parser.getClass().getName());
845 } catch (SAXNotRecognizedException e) {
846 throw new JDOMException(
847 displayName + " property not recognized for SAX driver " + parser.getClass().getName());
848 }
849 }
850
851 /**
852 * <p>
853 * This builds a document from the supplied
854 * input stream.
855 * </p>
856 *
857 * @param in <code>InputStream</code> to read from
858 * @return <code>Document</code> resultant Document object
859 * @throws JDOMException when errors occur in parsing
860 * @throws IOException when an I/O error prevents a document
861 * from being fully parsed.
862 */
863 public Document build(InputStream in)
864 throws JDOMException, IOException {
865 return build(new InputSource(in));
866 }
867
868 /**
869 * <p>
870 * This builds a document from the supplied
871 * filename.
872 * </p>
873 *
874 * @param file <code>File</code> to read from
875 * @return <code>Document</code> resultant Document object
876 * @throws JDOMException when errors occur in parsing
877 * @throws IOException when an I/O error prevents a document
878 * from being fully parsed
879 */
880 public Document build(File file)
881 throws JDOMException, IOException {
882 try {
883 URL url = fileToURL(file);
884 return build(url);
885 } catch (MalformedURLException e) {
886 throw new JDOMException("Error in building", e);
887 }
888 }
889
890 /**
891 * <p>
892 * This builds a document from the supplied
893 * URL.
894 * </p>
895 *
896 * @param url <code>URL</code> to read from.
897 * @return <code>Document</code> - resultant Document object.
898 * @throws JDOMException when errors occur in parsing
899 * @throws IOException when an I/O error prevents a document
900 * from being fully parsed.
901 */
902 public Document build(URL url)
903 throws JDOMException, IOException {
904 String systemID = url.toExternalForm();
905 return build(new InputSource(systemID));
906 }
907
908 /**
909 * <p>
910 * This builds a document from the supplied
911 * input stream.
912 * </p>
913 *
914 * @param in <code>InputStream</code> to read from.
915 * @param systemId base for resolving relative URIs
916 * @return <code>Document</code> resultant Document object
917 * @throws JDOMException when errors occur in parsing
918 * @throws IOException when an I/O error prevents a document
919 * from being fully parsed
920 */
921 public Document build(InputStream in, String systemId)
922 throws JDOMException, IOException {
923
924 InputSource src = new InputSource(in);
925 src.setSystemId(systemId);
926 return build(src);
927 }
928
929 /**
930 * <p>
931 * This builds a document from the supplied
932 * Reader. It's the programmer's responsibility to make sure
933 * the reader matches the encoding of the file. It's often easier
934 * and safer to use an InputStream rather than a Reader, and to let the
935 * parser auto-detect the encoding from the XML declaration.
936 * </p>
937 *
938 * @param characterStream <code>Reader</code> to read from
939 * @return <code>Document</code> resultant Document object
940 * @throws JDOMException when errors occur in parsing
941 * @throws IOException when an I/O error prevents a document
942 * from being fully parsed
943 */
944 public Document build(Reader characterStream)
945 throws JDOMException, IOException {
946 return build(new InputSource(characterStream));
947 }
948
949 /**
950 * <p>
951 * This builds a document from the supplied
952 * Reader. It's the programmer's responsibility to make sure
953 * the reader matches the encoding of the file. It's often easier
954 * and safer to use an InputStream rather than a Reader, and to let the
955 * parser auto-detect the encoding from the XML declaration.
956 * </p>
957 *
958 * @param characterStream <code>Reader</code> to read from.
959 * @param systemId base for resolving relative URIs
960 * @return <code>Document</code> resultant Document object
961 * @throws JDOMException when errors occur in parsing
962 * @throws IOException when an I/O error prevents a document
963 * from being fully parsed
964 */
965 public Document build(Reader characterStream, String systemId)
966 throws JDOMException, IOException {
967
968 InputSource src = new InputSource(characterStream);
969 src.setSystemId(systemId);
970 return build(src);
971 }
972
973 /**
974 * <p>
975 * This builds a document from the supplied
976 * URI.
977 * </p>
978 * @param systemId URI for the input
979 * @return <code>Document</code> resultant Document object
980 * @throws JDOMException when errors occur in parsing
981 * @throws IOException when an I/O error prevents a document
982 * from being fully parsed
983 */
984 public Document build(String systemId)
985 throws JDOMException, IOException {
986 return build(new InputSource(systemId));
987 }
988
989 // /**
990 // * Imitation of File.toURL(), a JDK 1.2 method, reimplemented
991 // * here to work with JDK 1.1.
992 // *
993 // * @see java.io.File
994 // *
995 // * @param f the file to convert
996 // * @return the file path converted to a file: URL
997 // */
998 // protected URL fileToURL(File f) throws MalformedURLException {
999 // String path = f.getAbsolutePath();
1000 // if (File.separatorChar != '/') {
1001 // path = path.replace(File.separatorChar, '/');
1002 // }
1003 // if (!path.startsWith("/")) {
1004 // path = "/" + path;
1005 // }
1006 // if (!path.endsWith("/") && f.isDirectory()) {
1007 // path = path + "/";
1008 // }
1009 // return new URL("file", "", path);
1010 // }
1011
1012 /** Custom File.toUrl() implementation to handle special chars in file names
1013 *
1014 * @param file file object whose path will be converted
1015 * @return URL form of the file, with special characters handled
1016 * @throws MalformedURLException if there's a problem constructing a URL
1017 */
1018 private static URL fileToURL(File file) throws MalformedURLException {
1019 StringBuffer buffer = new StringBuffer();
1020 String path = file.getAbsolutePath();
1021
1022 // Convert non-URL style file separators
1023 if (File.separatorChar != '/') {
1024 path = path.replace(File.separatorChar, '/');
1025 }
1026
1027 // Make sure it starts at root
1028 if (!path.startsWith("/")) {
1029 buffer.append('/');
1030 }
1031
1032 // Copy, converting URL special characters as we go
1033 int len = path.length();
1034 for (int i = 0; i < len; i++) {
1035 char c = path.charAt(i);
1036 if (c == ' ')
1037 buffer.append("%20");
1038 else if (c == '#')
1039 buffer.append("%23");
1040 else if (c == '%')
1041 buffer.append("%25");
1042 else if (c == '&')
1043 buffer.append("%26");
1044 else if (c == ';')
1045 buffer.append("%3B");
1046 else if (c == '<')
1047 buffer.append("%3C");
1048 else if (c == '=')
1049 buffer.append("%3D");
1050 else if (c == '>')
1051 buffer.append("%3E");
1052 else if (c == '?')
1053 buffer.append("%3F");
1054 else if (c == '~')
1055 buffer.append("%7E");
1056 else
1057 buffer.append(c);
1058 }
1059
1060 // Make sure directories end with slash
1061 if (!path.endsWith("/") && file.isDirectory()) {
1062 buffer.append('/');
1063 }
1064
1065 // Return URL
1066 return new URL("file", "", buffer.toString());
1067 }
1068
1069 /**
1070 * Returns whether or not entities are being expanded into normal text
1071 * content.
1072 *
1073 * @return whether entities are being expanded
1074 */
1075 public boolean getExpandEntities() {
1076 return expand;
1077 }
1078
1079 /**
1080 * <p>
1081 * This sets whether or not to expand entities for the builder.
1082 * A true means to expand entities as normal content. A false means to
1083 * leave entities unexpanded as <code>EntityRef</code> objects. The
1084 * default is true.
1085 * </p>
1086 * <p>
1087 * When this setting is false, the internal DTD subset is retained; when
1088 * this setting is true, the internal DTD subset is not retained.
1089 * </p>
1090 * <p>
1091 * Note that Xerces (at least up to 1.4.4) has a bug where entities
1092 * in attribute values will be misreported if this flag is turned off,
1093 * resulting in entities to appear within element content. When turning
1094 * entity expansion off either avoid entities in attribute values, or
1095 * use another parser like Crimson.
1096 * http://nagoya.apache.org/bugzilla/show_bug.cgi?id=6111
1097 * </p>
1098 *
1099 * @param expand <code>boolean</code> indicating whether entity expansion
1100 * should occur.
1101 */
1102 public void setExpandEntities(boolean expand) {
1103 this.expand = expand;
1104 }
1105 }