0
|
1 /*--
|
|
2
|
|
3 $Id: SAXBuilder.java,v 1.93 2009/07/23 06:26:26 jhunter Exp $
|
|
4
|
|
5 Copyright (C) 2000-2007 Jason Hunter & Brett McLaughlin.
|
|
6 All rights reserved.
|
|
7
|
|
8 Redistribution and use in source and binary forms, with or without
|
|
9 modification, are permitted provided that the following conditions
|
|
10 are met:
|
|
11
|
|
12 1. Redistributions of source code must retain the above copyright
|
|
13 notice, this list of conditions, and the following disclaimer.
|
|
14
|
|
15 2. Redistributions in binary form must reproduce the above copyright
|
|
16 notice, this list of conditions, and the disclaimer that follows
|
|
17 these conditions in the documentation and/or other materials
|
|
18 provided with the distribution.
|
|
19
|
|
20 3. The name "JDOM" must not be used to endorse or promote products
|
|
21 derived from this software without prior written permission. For
|
|
22 written permission, please contact <request_AT_jdom_DOT_org>.
|
|
23
|
|
24 4. Products derived from this software may not be called "JDOM", nor
|
|
25 may "JDOM" appear in their name, without prior written permission
|
|
26 from the JDOM Project Management <request_AT_jdom_DOT_org>.
|
|
27
|
|
28 In addition, we request (but do not require) that you include in the
|
|
29 end-user documentation provided with the redistribution and/or in the
|
|
30 software itself an acknowledgement equivalent to the following:
|
|
31 "This product includes software developed by the
|
|
32 JDOM Project (http://www.jdom.org/)."
|
|
33 Alternatively, the acknowledgment may be graphical using the logos
|
|
34 available at http://www.jdom.org/images/logos.
|
|
35
|
|
36 THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
|
|
37 WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
|
38 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
39 DISCLAIMED. IN NO EVENT SHALL THE JDOM AUTHORS OR THE PROJECT
|
|
40 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
41 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
42 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
|
|
43 USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
|
44 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
45 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
|
|
46 OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
47 SUCH DAMAGE.
|
|
48
|
|
49 This software consists of voluntary contributions made by many
|
|
50 individuals on behalf of the JDOM Project and was originally
|
|
51 created by Jason Hunter <jhunter_AT_jdom_DOT_org> and
|
|
52 Brett McLaughlin <brett_AT_jdom_DOT_org>. For more information
|
|
53 on the JDOM Project, please see <http://www.jdom.org/>.
|
|
54
|
|
55 */
|
|
56
|
|
57 package org.jdom.input;
|
|
58
|
|
59 import java.io.*;
|
|
60 import java.lang.reflect.*;
|
|
61 import java.net.*;
|
|
62 import java.util.*;
|
|
63
|
|
64 import org.jdom.*;
|
|
65
|
|
66 import org.xml.sax.*;
|
|
67 import org.xml.sax.helpers.XMLReaderFactory;
|
|
68
|
|
69 /**
|
|
70 * Builds a JDOM document from files, streams, readers, URLs, or a SAX {@link
|
|
71 * org.xml.sax.InputSource} instance using a SAX parser. The builder uses a
|
|
72 * third-party SAX parser (chosen by JAXP by default, or you can choose
|
|
73 * manually) to handle the parsing duties and simply listens to the SAX events
|
|
74 * to construct a document. Details which SAX does not provide, such as
|
|
75 * whitespace outside the root element, are not represented in the JDOM
|
|
76 * document. Information about SAX can be found at <a
|
|
77 * href="http://www.saxproject.org">http://www.saxproject.org</a>.
|
|
78 * <p>
|
|
79 * Known issues: Relative paths for a {@link DocType} or {@link EntityRef} may
|
|
80 * be converted by the SAX parser into absolute paths.
|
|
81 *
|
|
82 * @version $Revision: 1.93 $, $Date: 2009/07/23 06:26:26 $
|
|
83 * @author Jason Hunter
|
|
84 * @author Brett McLaughlin
|
|
85 * @author Dan Schaffer
|
|
86 * @author Philip Nelson
|
|
87 * @author Alex Rosen
|
|
88 */
|
|
89 public class SAXBuilder {
|
|
90
|
|
91 private static final String CVS_ID =
|
|
92 "@(#) $RCSfile: SAXBuilder.java,v $ $Revision: 1.93 $ $Date: 2009/07/23 06:26:26 $ $Name: jdom_1_1_1 $";
|
|
93
|
|
94 /**
|
|
95 * Default parser class to use. This is used when no other parser
|
|
96 * is given and JAXP isn't available.
|
|
97 */
|
|
98 private static final String DEFAULT_SAX_DRIVER =
|
|
99 "org.apache.xerces.parsers.SAXParser";
|
|
100
|
|
101 /** Whether validation should occur */
|
|
102 private boolean validate;
|
|
103
|
|
104 /** Whether expansion of entities should occur */
|
|
105 private boolean expand = true;
|
|
106
|
|
107 /** Adapter class to use */
|
|
108 private String saxDriverClass;
|
|
109
|
|
110 /** ErrorHandler class to use */
|
|
111 private ErrorHandler saxErrorHandler = null;
|
|
112
|
|
113 /** EntityResolver class to use */
|
|
114 private EntityResolver saxEntityResolver = null;
|
|
115
|
|
116 /** DTDHandler class to use */
|
|
117 private DTDHandler saxDTDHandler = null;
|
|
118
|
|
119 /** XMLFilter instance to use */
|
|
120 private XMLFilter saxXMLFilter = null;
|
|
121
|
|
122 /** The factory for creating new JDOM objects */
|
|
123 private JDOMFactory factory = new DefaultJDOMFactory();
|
|
124
|
|
125 /** Whether to ignore ignorable whitespace */
|
|
126 private boolean ignoringWhite = false;
|
|
127
|
|
128 /** Whether to ignore all whitespace content */
|
|
129 private boolean ignoringBoundaryWhite = false;
|
|
130
|
|
131 /** User-specified features to be set on the SAX parser */
|
|
132 private HashMap features = new HashMap(5);
|
|
133
|
|
134 /** User-specified properties to be set on the SAX parser */
|
|
135 private HashMap properties = new HashMap(5);
|
|
136
|
|
137 /** Whether to use fast parser reconfiguration */
|
|
138 private boolean fastReconfigure = false;
|
|
139
|
|
140 /** Whether to try lexical reporting in fast parser reconfiguration */
|
|
141 private boolean skipNextLexicalReportingConfig = false;
|
|
142
|
|
143 /** Whether to to try entity expansion in fast parser reconfiguration */
|
|
144 private boolean skipNextEntityExpandConfig = false;
|
|
145
|
|
146 /**
|
|
147 * Whether parser reuse is allowed.
|
|
148 * <p>Default: <code>true</code></p>
|
|
149 */
|
|
150 private boolean reuseParser = true;
|
|
151
|
|
152 /** The current SAX parser, if parser reuse has been activated. */
|
|
153 private XMLReader saxParser = null;
|
|
154
|
|
155 /**
|
|
156 * Creates a new SAXBuilder which will attempt to first locate
|
|
157 * a parser via JAXP, then will try to use a set of default
|
|
158 * SAX Drivers. The underlying parser will not validate.
|
|
159 */
|
|
160 public SAXBuilder() {
|
|
161 this(false);
|
|
162 }
|
|
163
|
|
164 /**
|
|
165 * Creates a new SAXBuilder which will attempt to first locate
|
|
166 * a parser via JAXP, then will try to use a set of default
|
|
167 * SAX Drivers. The underlying parser will validate or not
|
|
168 * according to the given parameter.
|
|
169 *
|
|
170 * @param validate <code>boolean</code> indicating if
|
|
171 * validation should occur.
|
|
172 */
|
|
173 public SAXBuilder(boolean validate) {
|
|
174 this.validate = validate;
|
|
175 }
|
|
176
|
|
177 /**
|
|
178 * Creates a new SAXBuilder using the specified SAX parser.
|
|
179 * The underlying parser will not validate.
|
|
180 *
|
|
181 * @param saxDriverClass <code>String</code> name of SAX Driver
|
|
182 * to use for parsing.
|
|
183 */
|
|
184 public SAXBuilder(String saxDriverClass) {
|
|
185 this(saxDriverClass, false);
|
|
186 }
|
|
187
|
|
188 /**
|
|
189 * Creates a new SAXBuilder using the specified SAX parser.
|
|
190 * The underlying parser will validate or not
|
|
191 * according to the given parameter.
|
|
192 *
|
|
193 * @param saxDriverClass <code>String</code> name of SAX Driver
|
|
194 * to use for parsing.
|
|
195 * @param validate <code>boolean</code> indicating if
|
|
196 * validation should occur.
|
|
197 */
|
|
198 public SAXBuilder(String saxDriverClass, boolean validate) {
|
|
199 this.saxDriverClass = saxDriverClass;
|
|
200 this.validate = validate;
|
|
201 }
|
|
202
|
|
203 /**
|
|
204 * Returns the driver class assigned in the constructor, or null if none.
|
|
205 *
|
|
206 * @return the driver class assigned in the constructor
|
|
207 */
|
|
208 public String getDriverClass() {
|
|
209 return saxDriverClass;
|
|
210 }
|
|
211
|
|
212 /**
|
|
213 * Returns the current {@link org.jdom.JDOMFactory} in use.
|
|
214 * @return the factory in use
|
|
215 */
|
|
216 public JDOMFactory getFactory() {
|
|
217 return factory;
|
|
218 }
|
|
219
|
|
220 /**
|
|
221 * This sets a custom JDOMFactory for the builder. Use this to build
|
|
222 * the tree with your own subclasses of the JDOM classes.
|
|
223 *
|
|
224 * @param factory <code>JDOMFactory</code> to use
|
|
225 */
|
|
226 public void setFactory(JDOMFactory factory) {
|
|
227 this.factory = factory;
|
|
228 }
|
|
229
|
|
230 /**
|
|
231 * Returns whether validation is to be performed during the build.
|
|
232 *
|
|
233 * @return whether validation is to be performed during the build
|
|
234 */
|
|
235 public boolean getValidation() {
|
|
236 return validate;
|
|
237 }
|
|
238
|
|
239 /**
|
|
240 * This sets validation for the builder.
|
|
241 *
|
|
242 * @param validate <code>boolean</code> indicating whether validation
|
|
243 * should occur.
|
|
244 */
|
|
245 public void setValidation(boolean validate) {
|
|
246 this.validate = validate;
|
|
247 }
|
|
248
|
|
249 /**
|
|
250 * Returns the {@link ErrorHandler} assigned, or null if none.
|
|
251 * @return the ErrorHandler assigned, or null if none
|
|
252 */
|
|
253 public ErrorHandler getErrorHandler() {
|
|
254 return saxErrorHandler;
|
|
255 }
|
|
256
|
|
257 /**
|
|
258 * This sets custom ErrorHandler for the <code>Builder</code>.
|
|
259 *
|
|
260 * @param errorHandler <code>ErrorHandler</code>
|
|
261 */
|
|
262 public void setErrorHandler(ErrorHandler errorHandler) {
|
|
263 saxErrorHandler = errorHandler;
|
|
264 }
|
|
265
|
|
266 /**
|
|
267 * Returns the {@link EntityResolver} assigned, or null if none.
|
|
268 *
|
|
269 * @return the EntityResolver assigned
|
|
270 */
|
|
271 public EntityResolver getEntityResolver() {
|
|
272 return saxEntityResolver;
|
|
273 }
|
|
274
|
|
275 /**
|
|
276 * This sets custom EntityResolver for the <code>Builder</code>.
|
|
277 *
|
|
278 * @param entityResolver <code>EntityResolver</code>
|
|
279 */
|
|
280 public void setEntityResolver(EntityResolver entityResolver) {
|
|
281 saxEntityResolver = entityResolver;
|
|
282 }
|
|
283
|
|
284 /**
|
|
285 * Returns the {@link DTDHandler} assigned, or null if none.
|
|
286 *
|
|
287 * @return the DTDHandler assigned
|
|
288 */
|
|
289 public DTDHandler getDTDHandler() {
|
|
290 return saxDTDHandler;
|
|
291 }
|
|
292
|
|
293 /**
|
|
294 * This sets custom DTDHandler for the <code>Builder</code>.
|
|
295 *
|
|
296 * @param dtdHandler <code>DTDHandler</code>
|
|
297 */
|
|
298 public void setDTDHandler(DTDHandler dtdHandler) {
|
|
299 saxDTDHandler = dtdHandler;
|
|
300 }
|
|
301
|
|
302 /**
|
|
303 * Returns the {@link XMLFilter} used during parsing, or null if none.
|
|
304 *
|
|
305 * @return the XMLFilter used during parsing
|
|
306 */
|
|
307 public XMLFilter getXMLFilter() {
|
|
308 return saxXMLFilter;
|
|
309 }
|
|
310
|
|
311 /**
|
|
312 * This sets a custom {@link org.xml.sax.XMLFilter} for the builder.
|
|
313 *
|
|
314 * @param xmlFilter the filter to use
|
|
315 */
|
|
316 public void setXMLFilter(XMLFilter xmlFilter) {
|
|
317 saxXMLFilter = xmlFilter;
|
|
318 }
|
|
319
|
|
320 /**
|
|
321 * Returns whether element content whitespace is to be ignored during the
|
|
322 * build.
|
|
323 *
|
|
324 * @return whether element content whitespace is to be ignored during the
|
|
325 * build
|
|
326 */
|
|
327 public boolean getIgnoringElementContentWhitespace() {
|
|
328 return ignoringWhite;
|
|
329 }
|
|
330
|
|
331 /**
|
|
332 * Specifies whether or not the parser should elminate whitespace in
|
|
333 * element content (sometimes known as "ignorable whitespace") when
|
|
334 * building the document. Only whitespace which is contained within
|
|
335 * element content that has an element only content model will be
|
|
336 * eliminated (see XML Rec 3.2.1). For this setting to take effect
|
|
337 * requires that validation be turned on. The default value of this
|
|
338 * setting is <code>false</code>.
|
|
339 *
|
|
340 * @param ignoringWhite Whether to ignore ignorable whitespace
|
|
341 */
|
|
342 public void setIgnoringElementContentWhitespace(boolean ignoringWhite) {
|
|
343 this.ignoringWhite = ignoringWhite;
|
|
344 }
|
|
345
|
|
346 /**
|
|
347 * Returns whether or not the parser will elminate element content
|
|
348 * containing only whitespace.
|
|
349 *
|
|
350 * @return <code>boolean</code> - whether only whitespace content will
|
|
351 * be ignored during build.
|
|
352 *
|
|
353 * @see #setIgnoringBoundaryWhitespace
|
|
354 */
|
|
355 public boolean getIgnoringBoundaryWhitespace() {
|
|
356 return ignoringBoundaryWhite;
|
|
357 }
|
|
358
|
|
359 /**
|
|
360 * Specifies whether or not the parser should elminate boundary whitespace,
|
|
361 * a term that indicates whitespace-only text between element tags. This
|
|
362 * feature is a lot like {@link #setIgnoringElementContentWhitespace(boolean)}
|
|
363 * but this feature is more aggressive and doesn't require validation be
|
|
364 * turned on. The {@link #setIgnoringElementContentWhitespace(boolean)}
|
|
365 * call impacts the SAX parse process while this method impacts the JDOM
|
|
366 * build process, so it can be beneficial to turn both on for efficiency.
|
|
367 * For implementation efficiency, this method actually removes all
|
|
368 * whitespace-only text() nodes. That can, in some cases (like beteween an
|
|
369 * element tag and a comment), include whitespace that isn't just boundary
|
|
370 * whitespace. The default is <code>false</code>.
|
|
371 *
|
|
372 * @param ignoringBoundaryWhite Whether to ignore whitespace-only text
|
|
373 * noes
|
|
374 */
|
|
375 public void setIgnoringBoundaryWhitespace(boolean ignoringBoundaryWhite) {
|
|
376 this.ignoringBoundaryWhite = ignoringBoundaryWhite;
|
|
377 }
|
|
378
|
|
379 /**
|
|
380 * Returns whether the contained SAX parser instance is reused across
|
|
381 * multiple parses. The default is true.
|
|
382 *
|
|
383 * @return whether the contained SAX parser instance is reused across
|
|
384 * multiple parses
|
|
385 */
|
|
386 public boolean getReuseParser() {
|
|
387 return reuseParser;
|
|
388 }
|
|
389
|
|
390 /**
|
|
391 * Specifies whether this builder shall reuse the same SAX parser
|
|
392 * when performing subsequent parses or allocate a new parser for
|
|
393 * each parse. The default value of this setting is
|
|
394 * <code>true</code> (parser reuse).
|
|
395 * <p>
|
|
396 * <strong>Note</strong>: As SAX parser instances are not thread safe,
|
|
397 * the parser reuse feature should not be used with SAXBuilder instances
|
|
398 * shared among threads.</p>
|
|
399 *
|
|
400 * @param reuseParser Whether to reuse the SAX parser.
|
|
401 */
|
|
402 public void setReuseParser(boolean reuseParser) {
|
|
403 this.reuseParser = reuseParser;
|
|
404 this.saxParser = null;
|
|
405 }
|
|
406
|
|
407 /**
|
|
408 * Specifies whether this builder will do fast reconfiguration of the
|
|
409 * underlying SAX parser when reuseParser is true. This improves
|
|
410 * performance in cases where SAXBuilders are reused and lots of small
|
|
411 * documents are frequently parsed. This avoids attempting to set features
|
|
412 * on the SAX parser each time build() is called which result in
|
|
413 * SaxNotRecognizedExceptions. This should ONLY be set for builders where
|
|
414 * this specific case is an issue. The default value of this setting is
|
|
415 * <code>false</code> (no fast reconfiguration). If reuseParser is false,
|
|
416 * calling this has no effect.
|
|
417 *
|
|
418 * @param fastReconfigure Whether to do a fast reconfiguration of the parser
|
|
419 */
|
|
420 public void setFastReconfigure(boolean fastReconfigure) {
|
|
421 if (this.reuseParser) {
|
|
422 this.fastReconfigure = fastReconfigure;
|
|
423 }
|
|
424 }
|
|
425
|
|
426 /**
|
|
427 * This sets a feature on the SAX parser. See the SAX documentation for .
|
|
428 * more information.
|
|
429 * </p>
|
|
430 * <p>
|
|
431 * NOTE: SAXBuilder requires that some particular features of the SAX parser be
|
|
432 * set up in certain ways for it to work properly. The list of such features
|
|
433 * may change in the future. Therefore, the use of this method may cause
|
|
434 * parsing to break, and even if it doesn't break anything today it might
|
|
435 * break parsing in a future JDOM version, because what JDOM parsers require
|
|
436 * may change over time. Use with caution.
|
|
437 * </p>
|
|
438 *
|
|
439 * @param name The feature name, which is a fully-qualified URI.
|
|
440 * @param value The requested state of the feature (true or false).
|
|
441 */
|
|
442 public void setFeature(String name, boolean value) {
|
|
443 // Save the specified feature for later.
|
|
444 features.put(name, value ? Boolean.TRUE : Boolean.FALSE);
|
|
445 }
|
|
446
|
|
447 /**
|
|
448 * This sets a property on the SAX parser. See the SAX documentation for
|
|
449 * more information.
|
|
450 * <p>
|
|
451 * NOTE: SAXBuilder requires that some particular properties of the SAX parser be
|
|
452 * set up in certain ways for it to work properly. The list of such properties
|
|
453 * may change in the future. Therefore, the use of this method may cause
|
|
454 * parsing to break, and even if it doesn't break anything today it might
|
|
455 * break parsing in a future JDOM version, because what JDOM parsers require
|
|
456 * may change over time. Use with caution.
|
|
457 * </p>
|
|
458 *
|
|
459 * @param name The property name, which is a fully-qualified URI.
|
|
460 * @param value The requested value for the property.
|
|
461 */
|
|
462 public void setProperty(String name, Object value) {
|
|
463 // Save the specified property for later.
|
|
464 properties.put(name, value);
|
|
465 }
|
|
466
|
|
467 /**
|
|
468 * This builds a document from the supplied
|
|
469 * input source.
|
|
470 *
|
|
471 * @param in <code>InputSource</code> to read from
|
|
472 * @return <code>Document</code> resultant Document object
|
|
473 * @throws JDOMException when errors occur in parsing
|
|
474 * @throws IOException when an I/O error prevents a document
|
|
475 * from being fully parsed
|
|
476 */
|
|
477 public Document build(InputSource in)
|
|
478 throws JDOMException, IOException {
|
|
479 SAXHandler contentHandler = null;
|
|
480
|
|
481 try {
|
|
482 // Create and configure the content handler.
|
|
483 contentHandler = createContentHandler();
|
|
484 configureContentHandler(contentHandler);
|
|
485
|
|
486 XMLReader parser = this.saxParser;
|
|
487 if (parser == null) {
|
|
488 // Create and configure the parser.
|
|
489 parser = createParser();
|
|
490
|
|
491 // Install optional filter
|
|
492 if (saxXMLFilter != null) {
|
|
493 // Connect filter chain to parser
|
|
494 XMLFilter root = saxXMLFilter;
|
|
495 while (root.getParent() instanceof XMLFilter) {
|
|
496 root = (XMLFilter)root.getParent();
|
|
497 }
|
|
498 root.setParent(parser);
|
|
499
|
|
500 // Read from filter
|
|
501 parser = saxXMLFilter;
|
|
502 }
|
|
503
|
|
504 // Configure parser
|
|
505 configureParser(parser, contentHandler);
|
|
506
|
|
507 if (reuseParser) {
|
|
508 this.saxParser = parser;
|
|
509 }
|
|
510 }
|
|
511 else {
|
|
512 // Reset content handler as SAXHandler instances cannot
|
|
513 // be reused
|
|
514 configureParser(parser, contentHandler);
|
|
515 }
|
|
516
|
|
517 // Parse the document.
|
|
518 parser.parse(in);
|
|
519
|
|
520 return contentHandler.getDocument();
|
|
521 }
|
|
522 catch (SAXParseException e) {
|
|
523 Document doc = contentHandler.getDocument();
|
|
524 if (doc.hasRootElement() == false) {
|
|
525 doc = null;
|
|
526 }
|
|
527
|
|
528 String systemId = e.getSystemId();
|
|
529 if (systemId != null) {
|
|
530 throw new JDOMParseException("Error on line " +
|
|
531 e.getLineNumber() + " of document " + systemId, e, doc);
|
|
532 } else {
|
|
533 throw new JDOMParseException("Error on line " +
|
|
534 e.getLineNumber(), e, doc);
|
|
535 }
|
|
536 }
|
|
537 catch (SAXException e) {
|
|
538 throw new JDOMParseException("Error in building: " +
|
|
539 e.getMessage(), e, contentHandler.getDocument());
|
|
540 }
|
|
541 finally {
|
|
542 // Explicitly nullify the handler to encourage GC
|
|
543 // It's a stack var so this shouldn't be necessary, but it
|
|
544 // seems to help on some JVMs
|
|
545 contentHandler = null;
|
|
546 }
|
|
547 }
|
|
548
|
|
549 /**
|
|
550 * This creates the SAXHandler that will be used to build the Document.
|
|
551 *
|
|
552 * @return <code>SAXHandler</code> - resultant SAXHandler object.
|
|
553 */
|
|
554 protected SAXHandler createContentHandler() {
|
|
555 SAXHandler contentHandler = new SAXHandler(factory);
|
|
556 return contentHandler;
|
|
557 }
|
|
558
|
|
559 /**
|
|
560 * This configures the SAXHandler that will be used to build the Document.
|
|
561 * <p>
|
|
562 * The default implementation simply passes through some configuration
|
|
563 * settings that were set on the SAXBuilder: setExpandEntities() and
|
|
564 * setIgnoringElementContentWhitespace().
|
|
565 * </p>
|
|
566 * @param contentHandler The SAXHandler to configure
|
|
567 */
|
|
568 protected void configureContentHandler(SAXHandler contentHandler) {
|
|
569 // Setup pass through behavior
|
|
570 contentHandler.setExpandEntities(expand);
|
|
571 contentHandler.setIgnoringElementContentWhitespace(ignoringWhite);
|
|
572 contentHandler.setIgnoringBoundaryWhitespace(ignoringBoundaryWhite);
|
|
573 }
|
|
574
|
|
575 /**
|
|
576 * This creates the XMLReader to be used for reading the XML document.
|
|
577 * <p>
|
|
578 * The default behavior is to (1) use the saxDriverClass, if it has been
|
|
579 * set, (2) try to obtain a parser from JAXP, if it is available, and
|
|
580 * (3) if all else fails, use a hard-coded default parser (currently
|
|
581 * the Xerces parser). Subclasses may override this method to determine
|
|
582 * the parser to use in a different way.
|
|
583 * </p>
|
|
584 *
|
|
585 * @return <code>XMLReader</code> - resultant XMLReader object.
|
|
586 * @throws org.jdom.JDOMException
|
|
587 */
|
|
588 protected XMLReader createParser() throws JDOMException {
|
|
589 XMLReader parser = null;
|
|
590 if (saxDriverClass != null) {
|
|
591 // The user knows that they want to use a particular class
|
|
592 try {
|
|
593 parser = XMLReaderFactory.createXMLReader(saxDriverClass);
|
|
594
|
|
595 // Configure parser
|
|
596 setFeaturesAndProperties(parser, true);
|
|
597 }
|
|
598 catch (SAXException e) {
|
|
599 throw new JDOMException("Could not load " + saxDriverClass, e);
|
|
600 }
|
|
601 } else {
|
|
602 // Try using JAXP...
|
|
603 // Note we need JAXP 1.1, and if JAXP 1.0 is all that's
|
|
604 // available then the getXMLReader call fails and we skip
|
|
605 // to the hard coded default parser
|
|
606 try {
|
|
607 // Get factory class and method.
|
|
608 Class factoryClass =
|
|
609 Class.forName("org.jdom.input.JAXPParserFactory");
|
|
610
|
|
611 Method createParser =
|
|
612 factoryClass.getMethod("createParser",
|
|
613 new Class[] { boolean.class, Map.class, Map.class });
|
|
614
|
|
615 // Create SAX parser.
|
|
616 parser = (XMLReader)createParser.invoke(null,
|
|
617 new Object[] { validate ? Boolean.TRUE : Boolean.FALSE,
|
|
618 features, properties });
|
|
619
|
|
620 // Configure parser
|
|
621 setFeaturesAndProperties(parser, false);
|
|
622 }
|
|
623 catch (JDOMException e) {
|
|
624 throw e;
|
|
625 }
|
|
626 catch (NoClassDefFoundError e) {
|
|
627 // The class loader failed to resolve the dependencies
|
|
628 // of org.jdom.input.JAXPParserFactory. This probably means
|
|
629 // that no JAXP parser is present in its class path.
|
|
630 // => Ignore and try allocating default SAX parser instance.
|
|
631 }
|
|
632 catch (Exception e) {
|
|
633 // Ignore and try allocating default SAX parser instance.
|
|
634 }
|
|
635 }
|
|
636
|
|
637 // Check to see if we got a parser yet, if not, try to use a
|
|
638 // hard coded default
|
|
639 if (parser == null) {
|
|
640 try {
|
|
641 parser = XMLReaderFactory.createXMLReader(DEFAULT_SAX_DRIVER);
|
|
642 // System.out.println("using default " + DEFAULT_SAX_DRIVER);
|
|
643 saxDriverClass = parser.getClass().getName();
|
|
644
|
|
645 // Configure parser
|
|
646 setFeaturesAndProperties(parser, true);
|
|
647 }
|
|
648 catch (SAXException e) {
|
|
649 throw new JDOMException("Could not load default SAX parser: "
|
|
650 + DEFAULT_SAX_DRIVER, e);
|
|
651 }
|
|
652 }
|
|
653
|
|
654 return parser;
|
|
655 }
|
|
656
|
|
657 /**
|
|
658 * This configures the XMLReader to be used for reading the XML document.
|
|
659 * <p>
|
|
660 * The default implementation sets various options on the given XMLReader,
|
|
661 * such as validation, DTD resolution, entity handlers, etc., according
|
|
662 * to the options that were set (e.g. via <code>setEntityResolver</code>)
|
|
663 * and set various SAX properties and features that are required for JDOM
|
|
664 * internals. These features may change in future releases, so change this
|
|
665 * behavior at your own risk.
|
|
666 * </p>
|
|
667 * @param parser
|
|
668 * @param contentHandler
|
|
669 * @throws org.jdom.JDOMException
|
|
670 */
|
|
671 protected void configureParser(XMLReader parser, SAXHandler contentHandler)
|
|
672 throws JDOMException {
|
|
673
|
|
674 // Setup SAX handlers.
|
|
675
|
|
676 parser.setContentHandler(contentHandler);
|
|
677
|
|
678 if (saxEntityResolver != null) {
|
|
679 parser.setEntityResolver(saxEntityResolver);
|
|
680 }
|
|
681
|
|
682 if (saxDTDHandler != null) {
|
|
683 parser.setDTDHandler(saxDTDHandler);
|
|
684 } else {
|
|
685 parser.setDTDHandler(contentHandler);
|
|
686 }
|
|
687
|
|
688 if (saxErrorHandler != null) {
|
|
689 parser.setErrorHandler(saxErrorHandler);
|
|
690 } else {
|
|
691 parser.setErrorHandler(new BuilderErrorHandler());
|
|
692 }
|
|
693
|
|
694 // If fastReconfigure is enabled and we failed in the previous attempt
|
|
695 // in configuring lexical reporting, then we skip this step. This
|
|
696 // saves the work of repeated exception handling on each parse.
|
|
697 if (!skipNextLexicalReportingConfig) {
|
|
698 boolean success = false;
|
|
699
|
|
700 try {
|
|
701 parser.setProperty("http://xml.org/sax/handlers/LexicalHandler",
|
|
702 contentHandler);
|
|
703 success = true;
|
|
704 } catch (SAXNotSupportedException e) {
|
|
705 // No lexical reporting available
|
|
706 } catch (SAXNotRecognizedException e) {
|
|
707 // No lexical reporting available
|
|
708 }
|
|
709
|
|
710 // Some parsers use alternate property for lexical handling (grr...)
|
|
711 if (!success) {
|
|
712 try {
|
|
713 parser.setProperty("http://xml.org/sax/properties/lexical-handler",
|
|
714 contentHandler);
|
|
715 success = true;
|
|
716 } catch (SAXNotSupportedException e) {
|
|
717 // No lexical reporting available
|
|
718 } catch (SAXNotRecognizedException e) {
|
|
719 // No lexical reporting available
|
|
720 }
|
|
721 }
|
|
722
|
|
723 // If unable to configure this property and fastReconfigure is
|
|
724 // enabled, then setup to avoid this code path entirely next time.
|
|
725 if (!success && fastReconfigure) {
|
|
726 skipNextLexicalReportingConfig = true;
|
|
727 }
|
|
728 }
|
|
729
|
|
730 // If fastReconfigure is enabled and we failed in the previous attempt
|
|
731 // in configuring entity expansion, then skip this step. This
|
|
732 // saves the work of repeated exception handling on each parse.
|
|
733 if (!skipNextEntityExpandConfig) {
|
|
734 boolean success = false;
|
|
735
|
|
736 // Try setting the DeclHandler if entity expansion is off
|
|
737 if (!expand) {
|
|
738 try {
|
|
739 parser.setProperty("http://xml.org/sax/properties/declaration-handler",
|
|
740 contentHandler);
|
|
741 success = true;
|
|
742 } catch (SAXNotSupportedException e) {
|
|
743 // No lexical reporting available
|
|
744 } catch (SAXNotRecognizedException e) {
|
|
745 // No lexical reporting available
|
|
746 }
|
|
747 }
|
|
748
|
|
749 /* If unable to configure this property and fastReconfigure is
|
|
750 * enabled, then setup to avoid this code path entirely next time.
|
|
751 */
|
|
752 if (!success && fastReconfigure) {
|
|
753 skipNextEntityExpandConfig = true;
|
|
754 }
|
|
755 }
|
|
756 }
|
|
757
|
|
758 private void setFeaturesAndProperties(XMLReader parser,
|
|
759 boolean coreFeatures)
|
|
760 throws JDOMException {
|
|
761 // Set any user-specified features on the parser.
|
|
762 Iterator iter = features.keySet().iterator();
|
|
763 while (iter.hasNext()) {
|
|
764 String name = (String)iter.next();
|
|
765 Boolean value = (Boolean)features.get(name);
|
|
766 internalSetFeature(parser, name, value.booleanValue(), name);
|
|
767 }
|
|
768
|
|
769 // Set any user-specified properties on the parser.
|
|
770 iter = properties.keySet().iterator();
|
|
771 while (iter.hasNext()) {
|
|
772 String name = (String)iter.next();
|
|
773 internalSetProperty(parser, name, properties.get(name), name);
|
|
774 }
|
|
775
|
|
776 if (coreFeatures) {
|
|
777 // Set validation.
|
|
778 try {
|
|
779 internalSetFeature(parser,
|
|
780 "http://xml.org/sax/features/validation",
|
|
781 validate, "Validation");
|
|
782 } catch (JDOMException e) {
|
|
783 // If validation is not supported, and the user is requesting
|
|
784 // that we don't validate, that's fine - don't throw an
|
|
785 // exception.
|
|
786 if (validate)
|
|
787 throw e;
|
|
788 }
|
|
789
|
|
790 // Setup some namespace features.
|
|
791 internalSetFeature(parser,
|
|
792 "http://xml.org/sax/features/namespaces",
|
|
793 true, "Namespaces");
|
|
794 internalSetFeature(parser,
|
|
795 "http://xml.org/sax/features/namespace-prefixes",
|
|
796 true, "Namespace prefixes");
|
|
797 }
|
|
798
|
|
799 // Set entity expansion
|
|
800 // Note SAXHandler can work regardless of how this is set, but when
|
|
801 // entity expansion it's worth it to try to tell the parser not to
|
|
802 // even bother with external general entities.
|
|
803 // Apparently no parsers yet support this feature.
|
|
804 // XXX It might make sense to setEntityResolver() with a resolver
|
|
805 // that simply ignores external general entities
|
|
806 try {
|
|
807 if (parser.getFeature("http://xml.org/sax/features/external-general-entities") != expand) {
|
|
808 parser.setFeature("http://xml.org/sax/features/external-general-entities", expand);
|
|
809 }
|
|
810 }
|
|
811 catch (SAXNotRecognizedException e) { /* Ignore... */ }
|
|
812 catch (SAXNotSupportedException e) { /* Ignore... */ }
|
|
813 }
|
|
814
|
|
815 /**
|
|
816 * Tries to set a feature on the parser. If the feature cannot be set,
|
|
817 * throws a JDOMException describing the problem.
|
|
818 */
|
|
819 private void internalSetFeature(XMLReader parser, String feature,
|
|
820 boolean value, String displayName) throws JDOMException {
|
|
821 try {
|
|
822 parser.setFeature(feature, value);
|
|
823 } catch (SAXNotSupportedException e) {
|
|
824 throw new JDOMException(
|
|
825 displayName + " feature not supported for SAX driver " + parser.getClass().getName());
|
|
826 } catch (SAXNotRecognizedException e) {
|
|
827 throw new JDOMException(
|
|
828 displayName + " feature not recognized for SAX driver " + parser.getClass().getName());
|
|
829 }
|
|
830 }
|
|
831
|
|
832 /**
|
|
833 * <p>
|
|
834 * Tries to set a property on the parser. If the property cannot be set,
|
|
835 * throws a JDOMException describing the problem.
|
|
836 * </p>
|
|
837 */
|
|
838 private void internalSetProperty(XMLReader parser, String property,
|
|
839 Object value, String displayName) throws JDOMException {
|
|
840 try {
|
|
841 parser.setProperty(property, value);
|
|
842 } catch (SAXNotSupportedException e) {
|
|
843 throw new JDOMException(
|
|
844 displayName + " property not supported for SAX driver " + parser.getClass().getName());
|
|
845 } catch (SAXNotRecognizedException e) {
|
|
846 throw new JDOMException(
|
|
847 displayName + " property not recognized for SAX driver " + parser.getClass().getName());
|
|
848 }
|
|
849 }
|
|
850
|
|
851 /**
|
|
852 * <p>
|
|
853 * This builds a document from the supplied
|
|
854 * input stream.
|
|
855 * </p>
|
|
856 *
|
|
857 * @param in <code>InputStream</code> to read from
|
|
858 * @return <code>Document</code> resultant Document object
|
|
859 * @throws JDOMException when errors occur in parsing
|
|
860 * @throws IOException when an I/O error prevents a document
|
|
861 * from being fully parsed.
|
|
862 */
|
|
863 public Document build(InputStream in)
|
|
864 throws JDOMException, IOException {
|
|
865 return build(new InputSource(in));
|
|
866 }
|
|
867
|
|
868 /**
|
|
869 * <p>
|
|
870 * This builds a document from the supplied
|
|
871 * filename.
|
|
872 * </p>
|
|
873 *
|
|
874 * @param file <code>File</code> to read from
|
|
875 * @return <code>Document</code> resultant Document object
|
|
876 * @throws JDOMException when errors occur in parsing
|
|
877 * @throws IOException when an I/O error prevents a document
|
|
878 * from being fully parsed
|
|
879 */
|
|
880 public Document build(File file)
|
|
881 throws JDOMException, IOException {
|
|
882 try {
|
|
883 URL url = fileToURL(file);
|
|
884 return build(url);
|
|
885 } catch (MalformedURLException e) {
|
|
886 throw new JDOMException("Error in building", e);
|
|
887 }
|
|
888 }
|
|
889
|
|
890 /**
|
|
891 * <p>
|
|
892 * This builds a document from the supplied
|
|
893 * URL.
|
|
894 * </p>
|
|
895 *
|
|
896 * @param url <code>URL</code> to read from.
|
|
897 * @return <code>Document</code> - resultant Document object.
|
|
898 * @throws JDOMException when errors occur in parsing
|
|
899 * @throws IOException when an I/O error prevents a document
|
|
900 * from being fully parsed.
|
|
901 */
|
|
902 public Document build(URL url)
|
|
903 throws JDOMException, IOException {
|
|
904 String systemID = url.toExternalForm();
|
|
905 return build(new InputSource(systemID));
|
|
906 }
|
|
907
|
|
908 /**
|
|
909 * <p>
|
|
910 * This builds a document from the supplied
|
|
911 * input stream.
|
|
912 * </p>
|
|
913 *
|
|
914 * @param in <code>InputStream</code> to read from.
|
|
915 * @param systemId base for resolving relative URIs
|
|
916 * @return <code>Document</code> resultant Document object
|
|
917 * @throws JDOMException when errors occur in parsing
|
|
918 * @throws IOException when an I/O error prevents a document
|
|
919 * from being fully parsed
|
|
920 */
|
|
921 public Document build(InputStream in, String systemId)
|
|
922 throws JDOMException, IOException {
|
|
923
|
|
924 InputSource src = new InputSource(in);
|
|
925 src.setSystemId(systemId);
|
|
926 return build(src);
|
|
927 }
|
|
928
|
|
929 /**
|
|
930 * <p>
|
|
931 * This builds a document from the supplied
|
|
932 * Reader. It's the programmer's responsibility to make sure
|
|
933 * the reader matches the encoding of the file. It's often easier
|
|
934 * and safer to use an InputStream rather than a Reader, and to let the
|
|
935 * parser auto-detect the encoding from the XML declaration.
|
|
936 * </p>
|
|
937 *
|
|
938 * @param characterStream <code>Reader</code> to read from
|
|
939 * @return <code>Document</code> resultant Document object
|
|
940 * @throws JDOMException when errors occur in parsing
|
|
941 * @throws IOException when an I/O error prevents a document
|
|
942 * from being fully parsed
|
|
943 */
|
|
944 public Document build(Reader characterStream)
|
|
945 throws JDOMException, IOException {
|
|
946 return build(new InputSource(characterStream));
|
|
947 }
|
|
948
|
|
949 /**
|
|
950 * <p>
|
|
951 * This builds a document from the supplied
|
|
952 * Reader. It's the programmer's responsibility to make sure
|
|
953 * the reader matches the encoding of the file. It's often easier
|
|
954 * and safer to use an InputStream rather than a Reader, and to let the
|
|
955 * parser auto-detect the encoding from the XML declaration.
|
|
956 * </p>
|
|
957 *
|
|
958 * @param characterStream <code>Reader</code> to read from.
|
|
959 * @param systemId base for resolving relative URIs
|
|
960 * @return <code>Document</code> resultant Document object
|
|
961 * @throws JDOMException when errors occur in parsing
|
|
962 * @throws IOException when an I/O error prevents a document
|
|
963 * from being fully parsed
|
|
964 */
|
|
965 public Document build(Reader characterStream, String systemId)
|
|
966 throws JDOMException, IOException {
|
|
967
|
|
968 InputSource src = new InputSource(characterStream);
|
|
969 src.setSystemId(systemId);
|
|
970 return build(src);
|
|
971 }
|
|
972
|
|
973 /**
|
|
974 * <p>
|
|
975 * This builds a document from the supplied
|
|
976 * URI.
|
|
977 * </p>
|
|
978 * @param systemId URI for the input
|
|
979 * @return <code>Document</code> resultant Document object
|
|
980 * @throws JDOMException when errors occur in parsing
|
|
981 * @throws IOException when an I/O error prevents a document
|
|
982 * from being fully parsed
|
|
983 */
|
|
984 public Document build(String systemId)
|
|
985 throws JDOMException, IOException {
|
|
986 return build(new InputSource(systemId));
|
|
987 }
|
|
988
|
|
989 // /**
|
|
990 // * Imitation of File.toURL(), a JDK 1.2 method, reimplemented
|
|
991 // * here to work with JDK 1.1.
|
|
992 // *
|
|
993 // * @see java.io.File
|
|
994 // *
|
|
995 // * @param f the file to convert
|
|
996 // * @return the file path converted to a file: URL
|
|
997 // */
|
|
998 // protected URL fileToURL(File f) throws MalformedURLException {
|
|
999 // String path = f.getAbsolutePath();
|
|
1000 // if (File.separatorChar != '/') {
|
|
1001 // path = path.replace(File.separatorChar, '/');
|
|
1002 // }
|
|
1003 // if (!path.startsWith("/")) {
|
|
1004 // path = "/" + path;
|
|
1005 // }
|
|
1006 // if (!path.endsWith("/") && f.isDirectory()) {
|
|
1007 // path = path + "/";
|
|
1008 // }
|
|
1009 // return new URL("file", "", path);
|
|
1010 // }
|
|
1011
|
|
1012 /** Custom File.toUrl() implementation to handle special chars in file names
|
|
1013 *
|
|
1014 * @param file file object whose path will be converted
|
|
1015 * @return URL form of the file, with special characters handled
|
|
1016 * @throws MalformedURLException if there's a problem constructing a URL
|
|
1017 */
|
|
1018 private static URL fileToURL(File file) throws MalformedURLException {
|
|
1019 StringBuffer buffer = new StringBuffer();
|
|
1020 String path = file.getAbsolutePath();
|
|
1021
|
|
1022 // Convert non-URL style file separators
|
|
1023 if (File.separatorChar != '/') {
|
|
1024 path = path.replace(File.separatorChar, '/');
|
|
1025 }
|
|
1026
|
|
1027 // Make sure it starts at root
|
|
1028 if (!path.startsWith("/")) {
|
|
1029 buffer.append('/');
|
|
1030 }
|
|
1031
|
|
1032 // Copy, converting URL special characters as we go
|
|
1033 int len = path.length();
|
|
1034 for (int i = 0; i < len; i++) {
|
|
1035 char c = path.charAt(i);
|
|
1036 if (c == ' ')
|
|
1037 buffer.append("%20");
|
|
1038 else if (c == '#')
|
|
1039 buffer.append("%23");
|
|
1040 else if (c == '%')
|
|
1041 buffer.append("%25");
|
|
1042 else if (c == '&')
|
|
1043 buffer.append("%26");
|
|
1044 else if (c == ';')
|
|
1045 buffer.append("%3B");
|
|
1046 else if (c == '<')
|
|
1047 buffer.append("%3C");
|
|
1048 else if (c == '=')
|
|
1049 buffer.append("%3D");
|
|
1050 else if (c == '>')
|
|
1051 buffer.append("%3E");
|
|
1052 else if (c == '?')
|
|
1053 buffer.append("%3F");
|
|
1054 else if (c == '~')
|
|
1055 buffer.append("%7E");
|
|
1056 else
|
|
1057 buffer.append(c);
|
|
1058 }
|
|
1059
|
|
1060 // Make sure directories end with slash
|
|
1061 if (!path.endsWith("/") && file.isDirectory()) {
|
|
1062 buffer.append('/');
|
|
1063 }
|
|
1064
|
|
1065 // Return URL
|
|
1066 return new URL("file", "", buffer.toString());
|
|
1067 }
|
|
1068
|
|
1069 /**
|
|
1070 * Returns whether or not entities are being expanded into normal text
|
|
1071 * content.
|
|
1072 *
|
|
1073 * @return whether entities are being expanded
|
|
1074 */
|
|
1075 public boolean getExpandEntities() {
|
|
1076 return expand;
|
|
1077 }
|
|
1078
|
|
1079 /**
|
|
1080 * <p>
|
|
1081 * This sets whether or not to expand entities for the builder.
|
|
1082 * A true means to expand entities as normal content. A false means to
|
|
1083 * leave entities unexpanded as <code>EntityRef</code> objects. The
|
|
1084 * default is true.
|
|
1085 * </p>
|
|
1086 * <p>
|
|
1087 * When this setting is false, the internal DTD subset is retained; when
|
|
1088 * this setting is true, the internal DTD subset is not retained.
|
|
1089 * </p>
|
|
1090 * <p>
|
|
1091 * Note that Xerces (at least up to 1.4.4) has a bug where entities
|
|
1092 * in attribute values will be misreported if this flag is turned off,
|
|
1093 * resulting in entities to appear within element content. When turning
|
|
1094 * entity expansion off either avoid entities in attribute values, or
|
|
1095 * use another parser like Crimson.
|
|
1096 * http://nagoya.apache.org/bugzilla/show_bug.cgi?id=6111
|
|
1097 * </p>
|
|
1098 *
|
|
1099 * @param expand <code>boolean</code> indicating whether entity expansion
|
|
1100 * should occur.
|
|
1101 */
|
|
1102 public void setExpandEntities(boolean expand) {
|
|
1103 this.expand = expand;
|
|
1104 }
|
|
1105 }
|