Mercurial > repos > pfrommolt > ngsrich
comparison NGSrich_0.5.5/src/org/jdom/input/SAXBuilder.java @ 0:89ad0a9cca52 default tip
Uploaded
author | pfrommolt |
---|---|
date | Mon, 21 Nov 2011 08:12:19 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:89ad0a9cca52 |
---|---|
1 /*-- | |
2 | |
3 $Id: SAXBuilder.java,v 1.93 2009/07/23 06:26:26 jhunter Exp $ | |
4 | |
5 Copyright (C) 2000-2007 Jason Hunter & Brett McLaughlin. | |
6 All rights reserved. | |
7 | |
8 Redistribution and use in source and binary forms, with or without | |
9 modification, are permitted provided that the following conditions | |
10 are met: | |
11 | |
12 1. Redistributions of source code must retain the above copyright | |
13 notice, this list of conditions, and the following disclaimer. | |
14 | |
15 2. Redistributions in binary form must reproduce the above copyright | |
16 notice, this list of conditions, and the disclaimer that follows | |
17 these conditions in the documentation and/or other materials | |
18 provided with the distribution. | |
19 | |
20 3. The name "JDOM" must not be used to endorse or promote products | |
21 derived from this software without prior written permission. For | |
22 written permission, please contact <request_AT_jdom_DOT_org>. | |
23 | |
24 4. Products derived from this software may not be called "JDOM", nor | |
25 may "JDOM" appear in their name, without prior written permission | |
26 from the JDOM Project Management <request_AT_jdom_DOT_org>. | |
27 | |
28 In addition, we request (but do not require) that you include in the | |
29 end-user documentation provided with the redistribution and/or in the | |
30 software itself an acknowledgement equivalent to the following: | |
31 "This product includes software developed by the | |
32 JDOM Project (http://www.jdom.org/)." | |
33 Alternatively, the acknowledgment may be graphical using the logos | |
34 available at http://www.jdom.org/images/logos. | |
35 | |
36 THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED | |
37 WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES | |
38 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | |
39 DISCLAIMED. IN NO EVENT SHALL THE JDOM AUTHORS OR THE PROJECT | |
40 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
41 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
42 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF | |
43 USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND | |
44 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |
45 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT | |
46 OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
47 SUCH DAMAGE. | |
48 | |
49 This software consists of voluntary contributions made by many | |
50 individuals on behalf of the JDOM Project and was originally | |
51 created by Jason Hunter <jhunter_AT_jdom_DOT_org> and | |
52 Brett McLaughlin <brett_AT_jdom_DOT_org>. For more information | |
53 on the JDOM Project, please see <http://www.jdom.org/>. | |
54 | |
55 */ | |
56 | |
57 package org.jdom.input; | |
58 | |
59 import java.io.*; | |
60 import java.lang.reflect.*; | |
61 import java.net.*; | |
62 import java.util.*; | |
63 | |
64 import org.jdom.*; | |
65 | |
66 import org.xml.sax.*; | |
67 import org.xml.sax.helpers.XMLReaderFactory; | |
68 | |
69 /** | |
70 * Builds a JDOM document from files, streams, readers, URLs, or a SAX {@link | |
71 * org.xml.sax.InputSource} instance using a SAX parser. The builder uses a | |
72 * third-party SAX parser (chosen by JAXP by default, or you can choose | |
73 * manually) to handle the parsing duties and simply listens to the SAX events | |
74 * to construct a document. Details which SAX does not provide, such as | |
75 * whitespace outside the root element, are not represented in the JDOM | |
76 * document. Information about SAX can be found at <a | |
77 * href="http://www.saxproject.org">http://www.saxproject.org</a>. | |
78 * <p> | |
79 * Known issues: Relative paths for a {@link DocType} or {@link EntityRef} may | |
80 * be converted by the SAX parser into absolute paths. | |
81 * | |
82 * @version $Revision: 1.93 $, $Date: 2009/07/23 06:26:26 $ | |
83 * @author Jason Hunter | |
84 * @author Brett McLaughlin | |
85 * @author Dan Schaffer | |
86 * @author Philip Nelson | |
87 * @author Alex Rosen | |
88 */ | |
89 public class SAXBuilder { | |
90 | |
91 private static final String CVS_ID = | |
92 "@(#) $RCSfile: SAXBuilder.java,v $ $Revision: 1.93 $ $Date: 2009/07/23 06:26:26 $ $Name: jdom_1_1_1 $"; | |
93 | |
94 /** | |
95 * Default parser class to use. This is used when no other parser | |
96 * is given and JAXP isn't available. | |
97 */ | |
98 private static final String DEFAULT_SAX_DRIVER = | |
99 "org.apache.xerces.parsers.SAXParser"; | |
100 | |
101 /** Whether validation should occur */ | |
102 private boolean validate; | |
103 | |
104 /** Whether expansion of entities should occur */ | |
105 private boolean expand = true; | |
106 | |
107 /** Adapter class to use */ | |
108 private String saxDriverClass; | |
109 | |
110 /** ErrorHandler class to use */ | |
111 private ErrorHandler saxErrorHandler = null; | |
112 | |
113 /** EntityResolver class to use */ | |
114 private EntityResolver saxEntityResolver = null; | |
115 | |
116 /** DTDHandler class to use */ | |
117 private DTDHandler saxDTDHandler = null; | |
118 | |
119 /** XMLFilter instance to use */ | |
120 private XMLFilter saxXMLFilter = null; | |
121 | |
122 /** The factory for creating new JDOM objects */ | |
123 private JDOMFactory factory = new DefaultJDOMFactory(); | |
124 | |
125 /** Whether to ignore ignorable whitespace */ | |
126 private boolean ignoringWhite = false; | |
127 | |
128 /** Whether to ignore all whitespace content */ | |
129 private boolean ignoringBoundaryWhite = false; | |
130 | |
131 /** User-specified features to be set on the SAX parser */ | |
132 private HashMap features = new HashMap(5); | |
133 | |
134 /** User-specified properties to be set on the SAX parser */ | |
135 private HashMap properties = new HashMap(5); | |
136 | |
137 /** Whether to use fast parser reconfiguration */ | |
138 private boolean fastReconfigure = false; | |
139 | |
140 /** Whether to try lexical reporting in fast parser reconfiguration */ | |
141 private boolean skipNextLexicalReportingConfig = false; | |
142 | |
143 /** Whether to to try entity expansion in fast parser reconfiguration */ | |
144 private boolean skipNextEntityExpandConfig = false; | |
145 | |
146 /** | |
147 * Whether parser reuse is allowed. | |
148 * <p>Default: <code>true</code></p> | |
149 */ | |
150 private boolean reuseParser = true; | |
151 | |
152 /** The current SAX parser, if parser reuse has been activated. */ | |
153 private XMLReader saxParser = null; | |
154 | |
155 /** | |
156 * Creates a new SAXBuilder which will attempt to first locate | |
157 * a parser via JAXP, then will try to use a set of default | |
158 * SAX Drivers. The underlying parser will not validate. | |
159 */ | |
160 public SAXBuilder() { | |
161 this(false); | |
162 } | |
163 | |
164 /** | |
165 * Creates a new SAXBuilder which will attempt to first locate | |
166 * a parser via JAXP, then will try to use a set of default | |
167 * SAX Drivers. The underlying parser will validate or not | |
168 * according to the given parameter. | |
169 * | |
170 * @param validate <code>boolean</code> indicating if | |
171 * validation should occur. | |
172 */ | |
173 public SAXBuilder(boolean validate) { | |
174 this.validate = validate; | |
175 } | |
176 | |
177 /** | |
178 * Creates a new SAXBuilder using the specified SAX parser. | |
179 * The underlying parser will not validate. | |
180 * | |
181 * @param saxDriverClass <code>String</code> name of SAX Driver | |
182 * to use for parsing. | |
183 */ | |
184 public SAXBuilder(String saxDriverClass) { | |
185 this(saxDriverClass, false); | |
186 } | |
187 | |
188 /** | |
189 * Creates a new SAXBuilder using the specified SAX parser. | |
190 * The underlying parser will validate or not | |
191 * according to the given parameter. | |
192 * | |
193 * @param saxDriverClass <code>String</code> name of SAX Driver | |
194 * to use for parsing. | |
195 * @param validate <code>boolean</code> indicating if | |
196 * validation should occur. | |
197 */ | |
198 public SAXBuilder(String saxDriverClass, boolean validate) { | |
199 this.saxDriverClass = saxDriverClass; | |
200 this.validate = validate; | |
201 } | |
202 | |
203 /** | |
204 * Returns the driver class assigned in the constructor, or null if none. | |
205 * | |
206 * @return the driver class assigned in the constructor | |
207 */ | |
208 public String getDriverClass() { | |
209 return saxDriverClass; | |
210 } | |
211 | |
212 /** | |
213 * Returns the current {@link org.jdom.JDOMFactory} in use. | |
214 * @return the factory in use | |
215 */ | |
216 public JDOMFactory getFactory() { | |
217 return factory; | |
218 } | |
219 | |
220 /** | |
221 * This sets a custom JDOMFactory for the builder. Use this to build | |
222 * the tree with your own subclasses of the JDOM classes. | |
223 * | |
224 * @param factory <code>JDOMFactory</code> to use | |
225 */ | |
226 public void setFactory(JDOMFactory factory) { | |
227 this.factory = factory; | |
228 } | |
229 | |
230 /** | |
231 * Returns whether validation is to be performed during the build. | |
232 * | |
233 * @return whether validation is to be performed during the build | |
234 */ | |
235 public boolean getValidation() { | |
236 return validate; | |
237 } | |
238 | |
239 /** | |
240 * This sets validation for the builder. | |
241 * | |
242 * @param validate <code>boolean</code> indicating whether validation | |
243 * should occur. | |
244 */ | |
245 public void setValidation(boolean validate) { | |
246 this.validate = validate; | |
247 } | |
248 | |
249 /** | |
250 * Returns the {@link ErrorHandler} assigned, or null if none. | |
251 * @return the ErrorHandler assigned, or null if none | |
252 */ | |
253 public ErrorHandler getErrorHandler() { | |
254 return saxErrorHandler; | |
255 } | |
256 | |
257 /** | |
258 * This sets custom ErrorHandler for the <code>Builder</code>. | |
259 * | |
260 * @param errorHandler <code>ErrorHandler</code> | |
261 */ | |
262 public void setErrorHandler(ErrorHandler errorHandler) { | |
263 saxErrorHandler = errorHandler; | |
264 } | |
265 | |
266 /** | |
267 * Returns the {@link EntityResolver} assigned, or null if none. | |
268 * | |
269 * @return the EntityResolver assigned | |
270 */ | |
271 public EntityResolver getEntityResolver() { | |
272 return saxEntityResolver; | |
273 } | |
274 | |
275 /** | |
276 * This sets custom EntityResolver for the <code>Builder</code>. | |
277 * | |
278 * @param entityResolver <code>EntityResolver</code> | |
279 */ | |
280 public void setEntityResolver(EntityResolver entityResolver) { | |
281 saxEntityResolver = entityResolver; | |
282 } | |
283 | |
284 /** | |
285 * Returns the {@link DTDHandler} assigned, or null if none. | |
286 * | |
287 * @return the DTDHandler assigned | |
288 */ | |
289 public DTDHandler getDTDHandler() { | |
290 return saxDTDHandler; | |
291 } | |
292 | |
293 /** | |
294 * This sets custom DTDHandler for the <code>Builder</code>. | |
295 * | |
296 * @param dtdHandler <code>DTDHandler</code> | |
297 */ | |
298 public void setDTDHandler(DTDHandler dtdHandler) { | |
299 saxDTDHandler = dtdHandler; | |
300 } | |
301 | |
302 /** | |
303 * Returns the {@link XMLFilter} used during parsing, or null if none. | |
304 * | |
305 * @return the XMLFilter used during parsing | |
306 */ | |
307 public XMLFilter getXMLFilter() { | |
308 return saxXMLFilter; | |
309 } | |
310 | |
311 /** | |
312 * This sets a custom {@link org.xml.sax.XMLFilter} for the builder. | |
313 * | |
314 * @param xmlFilter the filter to use | |
315 */ | |
316 public void setXMLFilter(XMLFilter xmlFilter) { | |
317 saxXMLFilter = xmlFilter; | |
318 } | |
319 | |
320 /** | |
321 * Returns whether element content whitespace is to be ignored during the | |
322 * build. | |
323 * | |
324 * @return whether element content whitespace is to be ignored during the | |
325 * build | |
326 */ | |
327 public boolean getIgnoringElementContentWhitespace() { | |
328 return ignoringWhite; | |
329 } | |
330 | |
331 /** | |
332 * Specifies whether or not the parser should elminate whitespace in | |
333 * element content (sometimes known as "ignorable whitespace") when | |
334 * building the document. Only whitespace which is contained within | |
335 * element content that has an element only content model will be | |
336 * eliminated (see XML Rec 3.2.1). For this setting to take effect | |
337 * requires that validation be turned on. The default value of this | |
338 * setting is <code>false</code>. | |
339 * | |
340 * @param ignoringWhite Whether to ignore ignorable whitespace | |
341 */ | |
342 public void setIgnoringElementContentWhitespace(boolean ignoringWhite) { | |
343 this.ignoringWhite = ignoringWhite; | |
344 } | |
345 | |
346 /** | |
347 * Returns whether or not the parser will elminate element content | |
348 * containing only whitespace. | |
349 * | |
350 * @return <code>boolean</code> - whether only whitespace content will | |
351 * be ignored during build. | |
352 * | |
353 * @see #setIgnoringBoundaryWhitespace | |
354 */ | |
355 public boolean getIgnoringBoundaryWhitespace() { | |
356 return ignoringBoundaryWhite; | |
357 } | |
358 | |
359 /** | |
360 * Specifies whether or not the parser should elminate boundary whitespace, | |
361 * a term that indicates whitespace-only text between element tags. This | |
362 * feature is a lot like {@link #setIgnoringElementContentWhitespace(boolean)} | |
363 * but this feature is more aggressive and doesn't require validation be | |
364 * turned on. The {@link #setIgnoringElementContentWhitespace(boolean)} | |
365 * call impacts the SAX parse process while this method impacts the JDOM | |
366 * build process, so it can be beneficial to turn both on for efficiency. | |
367 * For implementation efficiency, this method actually removes all | |
368 * whitespace-only text() nodes. That can, in some cases (like beteween an | |
369 * element tag and a comment), include whitespace that isn't just boundary | |
370 * whitespace. The default is <code>false</code>. | |
371 * | |
372 * @param ignoringBoundaryWhite Whether to ignore whitespace-only text | |
373 * noes | |
374 */ | |
375 public void setIgnoringBoundaryWhitespace(boolean ignoringBoundaryWhite) { | |
376 this.ignoringBoundaryWhite = ignoringBoundaryWhite; | |
377 } | |
378 | |
379 /** | |
380 * Returns whether the contained SAX parser instance is reused across | |
381 * multiple parses. The default is true. | |
382 * | |
383 * @return whether the contained SAX parser instance is reused across | |
384 * multiple parses | |
385 */ | |
386 public boolean getReuseParser() { | |
387 return reuseParser; | |
388 } | |
389 | |
390 /** | |
391 * Specifies whether this builder shall reuse the same SAX parser | |
392 * when performing subsequent parses or allocate a new parser for | |
393 * each parse. The default value of this setting is | |
394 * <code>true</code> (parser reuse). | |
395 * <p> | |
396 * <strong>Note</strong>: As SAX parser instances are not thread safe, | |
397 * the parser reuse feature should not be used with SAXBuilder instances | |
398 * shared among threads.</p> | |
399 * | |
400 * @param reuseParser Whether to reuse the SAX parser. | |
401 */ | |
402 public void setReuseParser(boolean reuseParser) { | |
403 this.reuseParser = reuseParser; | |
404 this.saxParser = null; | |
405 } | |
406 | |
407 /** | |
408 * Specifies whether this builder will do fast reconfiguration of the | |
409 * underlying SAX parser when reuseParser is true. This improves | |
410 * performance in cases where SAXBuilders are reused and lots of small | |
411 * documents are frequently parsed. This avoids attempting to set features | |
412 * on the SAX parser each time build() is called which result in | |
413 * SaxNotRecognizedExceptions. This should ONLY be set for builders where | |
414 * this specific case is an issue. The default value of this setting is | |
415 * <code>false</code> (no fast reconfiguration). If reuseParser is false, | |
416 * calling this has no effect. | |
417 * | |
418 * @param fastReconfigure Whether to do a fast reconfiguration of the parser | |
419 */ | |
420 public void setFastReconfigure(boolean fastReconfigure) { | |
421 if (this.reuseParser) { | |
422 this.fastReconfigure = fastReconfigure; | |
423 } | |
424 } | |
425 | |
426 /** | |
427 * This sets a feature on the SAX parser. See the SAX documentation for . | |
428 * more information. | |
429 * </p> | |
430 * <p> | |
431 * NOTE: SAXBuilder requires that some particular features of the SAX parser be | |
432 * set up in certain ways for it to work properly. The list of such features | |
433 * may change in the future. Therefore, the use of this method may cause | |
434 * parsing to break, and even if it doesn't break anything today it might | |
435 * break parsing in a future JDOM version, because what JDOM parsers require | |
436 * may change over time. Use with caution. | |
437 * </p> | |
438 * | |
439 * @param name The feature name, which is a fully-qualified URI. | |
440 * @param value The requested state of the feature (true or false). | |
441 */ | |
442 public void setFeature(String name, boolean value) { | |
443 // Save the specified feature for later. | |
444 features.put(name, value ? Boolean.TRUE : Boolean.FALSE); | |
445 } | |
446 | |
447 /** | |
448 * This sets a property on the SAX parser. See the SAX documentation for | |
449 * more information. | |
450 * <p> | |
451 * NOTE: SAXBuilder requires that some particular properties of the SAX parser be | |
452 * set up in certain ways for it to work properly. The list of such properties | |
453 * may change in the future. Therefore, the use of this method may cause | |
454 * parsing to break, and even if it doesn't break anything today it might | |
455 * break parsing in a future JDOM version, because what JDOM parsers require | |
456 * may change over time. Use with caution. | |
457 * </p> | |
458 * | |
459 * @param name The property name, which is a fully-qualified URI. | |
460 * @param value The requested value for the property. | |
461 */ | |
462 public void setProperty(String name, Object value) { | |
463 // Save the specified property for later. | |
464 properties.put(name, value); | |
465 } | |
466 | |
467 /** | |
468 * This builds a document from the supplied | |
469 * input source. | |
470 * | |
471 * @param in <code>InputSource</code> to read from | |
472 * @return <code>Document</code> resultant Document object | |
473 * @throws JDOMException when errors occur in parsing | |
474 * @throws IOException when an I/O error prevents a document | |
475 * from being fully parsed | |
476 */ | |
477 public Document build(InputSource in) | |
478 throws JDOMException, IOException { | |
479 SAXHandler contentHandler = null; | |
480 | |
481 try { | |
482 // Create and configure the content handler. | |
483 contentHandler = createContentHandler(); | |
484 configureContentHandler(contentHandler); | |
485 | |
486 XMLReader parser = this.saxParser; | |
487 if (parser == null) { | |
488 // Create and configure the parser. | |
489 parser = createParser(); | |
490 | |
491 // Install optional filter | |
492 if (saxXMLFilter != null) { | |
493 // Connect filter chain to parser | |
494 XMLFilter root = saxXMLFilter; | |
495 while (root.getParent() instanceof XMLFilter) { | |
496 root = (XMLFilter)root.getParent(); | |
497 } | |
498 root.setParent(parser); | |
499 | |
500 // Read from filter | |
501 parser = saxXMLFilter; | |
502 } | |
503 | |
504 // Configure parser | |
505 configureParser(parser, contentHandler); | |
506 | |
507 if (reuseParser) { | |
508 this.saxParser = parser; | |
509 } | |
510 } | |
511 else { | |
512 // Reset content handler as SAXHandler instances cannot | |
513 // be reused | |
514 configureParser(parser, contentHandler); | |
515 } | |
516 | |
517 // Parse the document. | |
518 parser.parse(in); | |
519 | |
520 return contentHandler.getDocument(); | |
521 } | |
522 catch (SAXParseException e) { | |
523 Document doc = contentHandler.getDocument(); | |
524 if (doc.hasRootElement() == false) { | |
525 doc = null; | |
526 } | |
527 | |
528 String systemId = e.getSystemId(); | |
529 if (systemId != null) { | |
530 throw new JDOMParseException("Error on line " + | |
531 e.getLineNumber() + " of document " + systemId, e, doc); | |
532 } else { | |
533 throw new JDOMParseException("Error on line " + | |
534 e.getLineNumber(), e, doc); | |
535 } | |
536 } | |
537 catch (SAXException e) { | |
538 throw new JDOMParseException("Error in building: " + | |
539 e.getMessage(), e, contentHandler.getDocument()); | |
540 } | |
541 finally { | |
542 // Explicitly nullify the handler to encourage GC | |
543 // It's a stack var so this shouldn't be necessary, but it | |
544 // seems to help on some JVMs | |
545 contentHandler = null; | |
546 } | |
547 } | |
548 | |
549 /** | |
550 * This creates the SAXHandler that will be used to build the Document. | |
551 * | |
552 * @return <code>SAXHandler</code> - resultant SAXHandler object. | |
553 */ | |
554 protected SAXHandler createContentHandler() { | |
555 SAXHandler contentHandler = new SAXHandler(factory); | |
556 return contentHandler; | |
557 } | |
558 | |
559 /** | |
560 * This configures the SAXHandler that will be used to build the Document. | |
561 * <p> | |
562 * The default implementation simply passes through some configuration | |
563 * settings that were set on the SAXBuilder: setExpandEntities() and | |
564 * setIgnoringElementContentWhitespace(). | |
565 * </p> | |
566 * @param contentHandler The SAXHandler to configure | |
567 */ | |
568 protected void configureContentHandler(SAXHandler contentHandler) { | |
569 // Setup pass through behavior | |
570 contentHandler.setExpandEntities(expand); | |
571 contentHandler.setIgnoringElementContentWhitespace(ignoringWhite); | |
572 contentHandler.setIgnoringBoundaryWhitespace(ignoringBoundaryWhite); | |
573 } | |
574 | |
575 /** | |
576 * This creates the XMLReader to be used for reading the XML document. | |
577 * <p> | |
578 * The default behavior is to (1) use the saxDriverClass, if it has been | |
579 * set, (2) try to obtain a parser from JAXP, if it is available, and | |
580 * (3) if all else fails, use a hard-coded default parser (currently | |
581 * the Xerces parser). Subclasses may override this method to determine | |
582 * the parser to use in a different way. | |
583 * </p> | |
584 * | |
585 * @return <code>XMLReader</code> - resultant XMLReader object. | |
586 * @throws org.jdom.JDOMException | |
587 */ | |
588 protected XMLReader createParser() throws JDOMException { | |
589 XMLReader parser = null; | |
590 if (saxDriverClass != null) { | |
591 // The user knows that they want to use a particular class | |
592 try { | |
593 parser = XMLReaderFactory.createXMLReader(saxDriverClass); | |
594 | |
595 // Configure parser | |
596 setFeaturesAndProperties(parser, true); | |
597 } | |
598 catch (SAXException e) { | |
599 throw new JDOMException("Could not load " + saxDriverClass, e); | |
600 } | |
601 } else { | |
602 // Try using JAXP... | |
603 // Note we need JAXP 1.1, and if JAXP 1.0 is all that's | |
604 // available then the getXMLReader call fails and we skip | |
605 // to the hard coded default parser | |
606 try { | |
607 // Get factory class and method. | |
608 Class factoryClass = | |
609 Class.forName("org.jdom.input.JAXPParserFactory"); | |
610 | |
611 Method createParser = | |
612 factoryClass.getMethod("createParser", | |
613 new Class[] { boolean.class, Map.class, Map.class }); | |
614 | |
615 // Create SAX parser. | |
616 parser = (XMLReader)createParser.invoke(null, | |
617 new Object[] { validate ? Boolean.TRUE : Boolean.FALSE, | |
618 features, properties }); | |
619 | |
620 // Configure parser | |
621 setFeaturesAndProperties(parser, false); | |
622 } | |
623 catch (JDOMException e) { | |
624 throw e; | |
625 } | |
626 catch (NoClassDefFoundError e) { | |
627 // The class loader failed to resolve the dependencies | |
628 // of org.jdom.input.JAXPParserFactory. This probably means | |
629 // that no JAXP parser is present in its class path. | |
630 // => Ignore and try allocating default SAX parser instance. | |
631 } | |
632 catch (Exception e) { | |
633 // Ignore and try allocating default SAX parser instance. | |
634 } | |
635 } | |
636 | |
637 // Check to see if we got a parser yet, if not, try to use a | |
638 // hard coded default | |
639 if (parser == null) { | |
640 try { | |
641 parser = XMLReaderFactory.createXMLReader(DEFAULT_SAX_DRIVER); | |
642 // System.out.println("using default " + DEFAULT_SAX_DRIVER); | |
643 saxDriverClass = parser.getClass().getName(); | |
644 | |
645 // Configure parser | |
646 setFeaturesAndProperties(parser, true); | |
647 } | |
648 catch (SAXException e) { | |
649 throw new JDOMException("Could not load default SAX parser: " | |
650 + DEFAULT_SAX_DRIVER, e); | |
651 } | |
652 } | |
653 | |
654 return parser; | |
655 } | |
656 | |
657 /** | |
658 * This configures the XMLReader to be used for reading the XML document. | |
659 * <p> | |
660 * The default implementation sets various options on the given XMLReader, | |
661 * such as validation, DTD resolution, entity handlers, etc., according | |
662 * to the options that were set (e.g. via <code>setEntityResolver</code>) | |
663 * and set various SAX properties and features that are required for JDOM | |
664 * internals. These features may change in future releases, so change this | |
665 * behavior at your own risk. | |
666 * </p> | |
667 * @param parser | |
668 * @param contentHandler | |
669 * @throws org.jdom.JDOMException | |
670 */ | |
671 protected void configureParser(XMLReader parser, SAXHandler contentHandler) | |
672 throws JDOMException { | |
673 | |
674 // Setup SAX handlers. | |
675 | |
676 parser.setContentHandler(contentHandler); | |
677 | |
678 if (saxEntityResolver != null) { | |
679 parser.setEntityResolver(saxEntityResolver); | |
680 } | |
681 | |
682 if (saxDTDHandler != null) { | |
683 parser.setDTDHandler(saxDTDHandler); | |
684 } else { | |
685 parser.setDTDHandler(contentHandler); | |
686 } | |
687 | |
688 if (saxErrorHandler != null) { | |
689 parser.setErrorHandler(saxErrorHandler); | |
690 } else { | |
691 parser.setErrorHandler(new BuilderErrorHandler()); | |
692 } | |
693 | |
694 // If fastReconfigure is enabled and we failed in the previous attempt | |
695 // in configuring lexical reporting, then we skip this step. This | |
696 // saves the work of repeated exception handling on each parse. | |
697 if (!skipNextLexicalReportingConfig) { | |
698 boolean success = false; | |
699 | |
700 try { | |
701 parser.setProperty("http://xml.org/sax/handlers/LexicalHandler", | |
702 contentHandler); | |
703 success = true; | |
704 } catch (SAXNotSupportedException e) { | |
705 // No lexical reporting available | |
706 } catch (SAXNotRecognizedException e) { | |
707 // No lexical reporting available | |
708 } | |
709 | |
710 // Some parsers use alternate property for lexical handling (grr...) | |
711 if (!success) { | |
712 try { | |
713 parser.setProperty("http://xml.org/sax/properties/lexical-handler", | |
714 contentHandler); | |
715 success = true; | |
716 } catch (SAXNotSupportedException e) { | |
717 // No lexical reporting available | |
718 } catch (SAXNotRecognizedException e) { | |
719 // No lexical reporting available | |
720 } | |
721 } | |
722 | |
723 // If unable to configure this property and fastReconfigure is | |
724 // enabled, then setup to avoid this code path entirely next time. | |
725 if (!success && fastReconfigure) { | |
726 skipNextLexicalReportingConfig = true; | |
727 } | |
728 } | |
729 | |
730 // If fastReconfigure is enabled and we failed in the previous attempt | |
731 // in configuring entity expansion, then skip this step. This | |
732 // saves the work of repeated exception handling on each parse. | |
733 if (!skipNextEntityExpandConfig) { | |
734 boolean success = false; | |
735 | |
736 // Try setting the DeclHandler if entity expansion is off | |
737 if (!expand) { | |
738 try { | |
739 parser.setProperty("http://xml.org/sax/properties/declaration-handler", | |
740 contentHandler); | |
741 success = true; | |
742 } catch (SAXNotSupportedException e) { | |
743 // No lexical reporting available | |
744 } catch (SAXNotRecognizedException e) { | |
745 // No lexical reporting available | |
746 } | |
747 } | |
748 | |
749 /* If unable to configure this property and fastReconfigure is | |
750 * enabled, then setup to avoid this code path entirely next time. | |
751 */ | |
752 if (!success && fastReconfigure) { | |
753 skipNextEntityExpandConfig = true; | |
754 } | |
755 } | |
756 } | |
757 | |
758 private void setFeaturesAndProperties(XMLReader parser, | |
759 boolean coreFeatures) | |
760 throws JDOMException { | |
761 // Set any user-specified features on the parser. | |
762 Iterator iter = features.keySet().iterator(); | |
763 while (iter.hasNext()) { | |
764 String name = (String)iter.next(); | |
765 Boolean value = (Boolean)features.get(name); | |
766 internalSetFeature(parser, name, value.booleanValue(), name); | |
767 } | |
768 | |
769 // Set any user-specified properties on the parser. | |
770 iter = properties.keySet().iterator(); | |
771 while (iter.hasNext()) { | |
772 String name = (String)iter.next(); | |
773 internalSetProperty(parser, name, properties.get(name), name); | |
774 } | |
775 | |
776 if (coreFeatures) { | |
777 // Set validation. | |
778 try { | |
779 internalSetFeature(parser, | |
780 "http://xml.org/sax/features/validation", | |
781 validate, "Validation"); | |
782 } catch (JDOMException e) { | |
783 // If validation is not supported, and the user is requesting | |
784 // that we don't validate, that's fine - don't throw an | |
785 // exception. | |
786 if (validate) | |
787 throw e; | |
788 } | |
789 | |
790 // Setup some namespace features. | |
791 internalSetFeature(parser, | |
792 "http://xml.org/sax/features/namespaces", | |
793 true, "Namespaces"); | |
794 internalSetFeature(parser, | |
795 "http://xml.org/sax/features/namespace-prefixes", | |
796 true, "Namespace prefixes"); | |
797 } | |
798 | |
799 // Set entity expansion | |
800 // Note SAXHandler can work regardless of how this is set, but when | |
801 // entity expansion it's worth it to try to tell the parser not to | |
802 // even bother with external general entities. | |
803 // Apparently no parsers yet support this feature. | |
804 // XXX It might make sense to setEntityResolver() with a resolver | |
805 // that simply ignores external general entities | |
806 try { | |
807 if (parser.getFeature("http://xml.org/sax/features/external-general-entities") != expand) { | |
808 parser.setFeature("http://xml.org/sax/features/external-general-entities", expand); | |
809 } | |
810 } | |
811 catch (SAXNotRecognizedException e) { /* Ignore... */ } | |
812 catch (SAXNotSupportedException e) { /* Ignore... */ } | |
813 } | |
814 | |
815 /** | |
816 * Tries to set a feature on the parser. If the feature cannot be set, | |
817 * throws a JDOMException describing the problem. | |
818 */ | |
819 private void internalSetFeature(XMLReader parser, String feature, | |
820 boolean value, String displayName) throws JDOMException { | |
821 try { | |
822 parser.setFeature(feature, value); | |
823 } catch (SAXNotSupportedException e) { | |
824 throw new JDOMException( | |
825 displayName + " feature not supported for SAX driver " + parser.getClass().getName()); | |
826 } catch (SAXNotRecognizedException e) { | |
827 throw new JDOMException( | |
828 displayName + " feature not recognized for SAX driver " + parser.getClass().getName()); | |
829 } | |
830 } | |
831 | |
832 /** | |
833 * <p> | |
834 * Tries to set a property on the parser. If the property cannot be set, | |
835 * throws a JDOMException describing the problem. | |
836 * </p> | |
837 */ | |
838 private void internalSetProperty(XMLReader parser, String property, | |
839 Object value, String displayName) throws JDOMException { | |
840 try { | |
841 parser.setProperty(property, value); | |
842 } catch (SAXNotSupportedException e) { | |
843 throw new JDOMException( | |
844 displayName + " property not supported for SAX driver " + parser.getClass().getName()); | |
845 } catch (SAXNotRecognizedException e) { | |
846 throw new JDOMException( | |
847 displayName + " property not recognized for SAX driver " + parser.getClass().getName()); | |
848 } | |
849 } | |
850 | |
851 /** | |
852 * <p> | |
853 * This builds a document from the supplied | |
854 * input stream. | |
855 * </p> | |
856 * | |
857 * @param in <code>InputStream</code> to read from | |
858 * @return <code>Document</code> resultant Document object | |
859 * @throws JDOMException when errors occur in parsing | |
860 * @throws IOException when an I/O error prevents a document | |
861 * from being fully parsed. | |
862 */ | |
863 public Document build(InputStream in) | |
864 throws JDOMException, IOException { | |
865 return build(new InputSource(in)); | |
866 } | |
867 | |
868 /** | |
869 * <p> | |
870 * This builds a document from the supplied | |
871 * filename. | |
872 * </p> | |
873 * | |
874 * @param file <code>File</code> to read from | |
875 * @return <code>Document</code> resultant Document object | |
876 * @throws JDOMException when errors occur in parsing | |
877 * @throws IOException when an I/O error prevents a document | |
878 * from being fully parsed | |
879 */ | |
880 public Document build(File file) | |
881 throws JDOMException, IOException { | |
882 try { | |
883 URL url = fileToURL(file); | |
884 return build(url); | |
885 } catch (MalformedURLException e) { | |
886 throw new JDOMException("Error in building", e); | |
887 } | |
888 } | |
889 | |
890 /** | |
891 * <p> | |
892 * This builds a document from the supplied | |
893 * URL. | |
894 * </p> | |
895 * | |
896 * @param url <code>URL</code> to read from. | |
897 * @return <code>Document</code> - resultant Document object. | |
898 * @throws JDOMException when errors occur in parsing | |
899 * @throws IOException when an I/O error prevents a document | |
900 * from being fully parsed. | |
901 */ | |
902 public Document build(URL url) | |
903 throws JDOMException, IOException { | |
904 String systemID = url.toExternalForm(); | |
905 return build(new InputSource(systemID)); | |
906 } | |
907 | |
908 /** | |
909 * <p> | |
910 * This builds a document from the supplied | |
911 * input stream. | |
912 * </p> | |
913 * | |
914 * @param in <code>InputStream</code> to read from. | |
915 * @param systemId base for resolving relative URIs | |
916 * @return <code>Document</code> resultant Document object | |
917 * @throws JDOMException when errors occur in parsing | |
918 * @throws IOException when an I/O error prevents a document | |
919 * from being fully parsed | |
920 */ | |
921 public Document build(InputStream in, String systemId) | |
922 throws JDOMException, IOException { | |
923 | |
924 InputSource src = new InputSource(in); | |
925 src.setSystemId(systemId); | |
926 return build(src); | |
927 } | |
928 | |
929 /** | |
930 * <p> | |
931 * This builds a document from the supplied | |
932 * Reader. It's the programmer's responsibility to make sure | |
933 * the reader matches the encoding of the file. It's often easier | |
934 * and safer to use an InputStream rather than a Reader, and to let the | |
935 * parser auto-detect the encoding from the XML declaration. | |
936 * </p> | |
937 * | |
938 * @param characterStream <code>Reader</code> to read from | |
939 * @return <code>Document</code> resultant Document object | |
940 * @throws JDOMException when errors occur in parsing | |
941 * @throws IOException when an I/O error prevents a document | |
942 * from being fully parsed | |
943 */ | |
944 public Document build(Reader characterStream) | |
945 throws JDOMException, IOException { | |
946 return build(new InputSource(characterStream)); | |
947 } | |
948 | |
949 /** | |
950 * <p> | |
951 * This builds a document from the supplied | |
952 * Reader. It's the programmer's responsibility to make sure | |
953 * the reader matches the encoding of the file. It's often easier | |
954 * and safer to use an InputStream rather than a Reader, and to let the | |
955 * parser auto-detect the encoding from the XML declaration. | |
956 * </p> | |
957 * | |
958 * @param characterStream <code>Reader</code> to read from. | |
959 * @param systemId base for resolving relative URIs | |
960 * @return <code>Document</code> resultant Document object | |
961 * @throws JDOMException when errors occur in parsing | |
962 * @throws IOException when an I/O error prevents a document | |
963 * from being fully parsed | |
964 */ | |
965 public Document build(Reader characterStream, String systemId) | |
966 throws JDOMException, IOException { | |
967 | |
968 InputSource src = new InputSource(characterStream); | |
969 src.setSystemId(systemId); | |
970 return build(src); | |
971 } | |
972 | |
973 /** | |
974 * <p> | |
975 * This builds a document from the supplied | |
976 * URI. | |
977 * </p> | |
978 * @param systemId URI for the input | |
979 * @return <code>Document</code> resultant Document object | |
980 * @throws JDOMException when errors occur in parsing | |
981 * @throws IOException when an I/O error prevents a document | |
982 * from being fully parsed | |
983 */ | |
984 public Document build(String systemId) | |
985 throws JDOMException, IOException { | |
986 return build(new InputSource(systemId)); | |
987 } | |
988 | |
989 // /** | |
990 // * Imitation of File.toURL(), a JDK 1.2 method, reimplemented | |
991 // * here to work with JDK 1.1. | |
992 // * | |
993 // * @see java.io.File | |
994 // * | |
995 // * @param f the file to convert | |
996 // * @return the file path converted to a file: URL | |
997 // */ | |
998 // protected URL fileToURL(File f) throws MalformedURLException { | |
999 // String path = f.getAbsolutePath(); | |
1000 // if (File.separatorChar != '/') { | |
1001 // path = path.replace(File.separatorChar, '/'); | |
1002 // } | |
1003 // if (!path.startsWith("/")) { | |
1004 // path = "/" + path; | |
1005 // } | |
1006 // if (!path.endsWith("/") && f.isDirectory()) { | |
1007 // path = path + "/"; | |
1008 // } | |
1009 // return new URL("file", "", path); | |
1010 // } | |
1011 | |
1012 /** Custom File.toUrl() implementation to handle special chars in file names | |
1013 * | |
1014 * @param file file object whose path will be converted | |
1015 * @return URL form of the file, with special characters handled | |
1016 * @throws MalformedURLException if there's a problem constructing a URL | |
1017 */ | |
1018 private static URL fileToURL(File file) throws MalformedURLException { | |
1019 StringBuffer buffer = new StringBuffer(); | |
1020 String path = file.getAbsolutePath(); | |
1021 | |
1022 // Convert non-URL style file separators | |
1023 if (File.separatorChar != '/') { | |
1024 path = path.replace(File.separatorChar, '/'); | |
1025 } | |
1026 | |
1027 // Make sure it starts at root | |
1028 if (!path.startsWith("/")) { | |
1029 buffer.append('/'); | |
1030 } | |
1031 | |
1032 // Copy, converting URL special characters as we go | |
1033 int len = path.length(); | |
1034 for (int i = 0; i < len; i++) { | |
1035 char c = path.charAt(i); | |
1036 if (c == ' ') | |
1037 buffer.append("%20"); | |
1038 else if (c == '#') | |
1039 buffer.append("%23"); | |
1040 else if (c == '%') | |
1041 buffer.append("%25"); | |
1042 else if (c == '&') | |
1043 buffer.append("%26"); | |
1044 else if (c == ';') | |
1045 buffer.append("%3B"); | |
1046 else if (c == '<') | |
1047 buffer.append("%3C"); | |
1048 else if (c == '=') | |
1049 buffer.append("%3D"); | |
1050 else if (c == '>') | |
1051 buffer.append("%3E"); | |
1052 else if (c == '?') | |
1053 buffer.append("%3F"); | |
1054 else if (c == '~') | |
1055 buffer.append("%7E"); | |
1056 else | |
1057 buffer.append(c); | |
1058 } | |
1059 | |
1060 // Make sure directories end with slash | |
1061 if (!path.endsWith("/") && file.isDirectory()) { | |
1062 buffer.append('/'); | |
1063 } | |
1064 | |
1065 // Return URL | |
1066 return new URL("file", "", buffer.toString()); | |
1067 } | |
1068 | |
1069 /** | |
1070 * Returns whether or not entities are being expanded into normal text | |
1071 * content. | |
1072 * | |
1073 * @return whether entities are being expanded | |
1074 */ | |
1075 public boolean getExpandEntities() { | |
1076 return expand; | |
1077 } | |
1078 | |
1079 /** | |
1080 * <p> | |
1081 * This sets whether or not to expand entities for the builder. | |
1082 * A true means to expand entities as normal content. A false means to | |
1083 * leave entities unexpanded as <code>EntityRef</code> objects. The | |
1084 * default is true. | |
1085 * </p> | |
1086 * <p> | |
1087 * When this setting is false, the internal DTD subset is retained; when | |
1088 * this setting is true, the internal DTD subset is not retained. | |
1089 * </p> | |
1090 * <p> | |
1091 * Note that Xerces (at least up to 1.4.4) has a bug where entities | |
1092 * in attribute values will be misreported if this flag is turned off, | |
1093 * resulting in entities to appear within element content. When turning | |
1094 * entity expansion off either avoid entities in attribute values, or | |
1095 * use another parser like Crimson. | |
1096 * http://nagoya.apache.org/bugzilla/show_bug.cgi?id=6111 | |
1097 * </p> | |
1098 * | |
1099 * @param expand <code>boolean</code> indicating whether entity expansion | |
1100 * should occur. | |
1101 */ | |
1102 public void setExpandEntities(boolean expand) { | |
1103 this.expand = expand; | |
1104 } | |
1105 } |