comparison NGSrich_0.5.5/src/org/jdom/Verifier.java @ 0:89ad0a9cca52 default tip

Uploaded
author pfrommolt
date Mon, 21 Nov 2011 08:12:19 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:89ad0a9cca52
1 /*--
2
3 $Id: Verifier.java,v 1.57 2009/07/23 05:54:23 jhunter Exp $
4
5 Copyright (C) 2000-2007 Jason Hunter & Brett McLaughlin.
6 All rights reserved.
7
8 Redistribution and use in source and binary forms, with or without
9 modification, are permitted provided that the following conditions
10 are met:
11
12 1. Redistributions of source code must retain the above copyright
13 notice, this list of conditions, and the following disclaimer.
14
15 2. Redistributions in binary form must reproduce the above copyright
16 notice, this list of conditions, and the disclaimer that follows
17 these conditions in the documentation and/or other materials
18 provided with the distribution.
19
20 3. The name "JDOM" must not be used to endorse or promote products
21 derived from this software without prior written permission. For
22 written permission, please contact <request_AT_jdom_DOT_org>.
23
24 4. Products derived from this software may not be called "JDOM", nor
25 may "JDOM" appear in their name, without prior written permission
26 from the JDOM Project Management <request_AT_jdom_DOT_org>.
27
28 In addition, we request (but do not require) that you include in the
29 end-user documentation provided with the redistribution and/or in the
30 software itself an acknowledgement equivalent to the following:
31 "This product includes software developed by the
32 JDOM Project (http://www.jdom.org/)."
33 Alternatively, the acknowledgment may be graphical using the logos
34 available at http://www.jdom.org/images/logos.
35
36 THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
37 WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
38 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
39 DISCLAIMED. IN NO EVENT SHALL THE JDOM AUTHORS OR THE PROJECT
40 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
42 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
43 USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
44 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
45 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
46 OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
47 SUCH DAMAGE.
48
49 This software consists of voluntary contributions made by many
50 individuals on behalf of the JDOM Project and was originally
51 created by Jason Hunter <jhunter_AT_jdom_DOT_org> and
52 Brett McLaughlin <brett_AT_jdom_DOT_org>. For more information
53 on the JDOM Project, please see <http://www.jdom.org/>.
54
55 */
56
57 package org.jdom;
58
59 import java.util.*;
60
61 /**
62 * A utility class to handle well-formedness checks on names, data, and other
63 * verification tasks for JDOM. The class is final and may not be subclassed.
64 *
65 * @version $Revision: 1.57 $, $Date: 2009/07/23 05:54:23 $
66 * @author Brett McLaughlin
67 * @author Elliotte Rusty Harold
68 * @author Jason Hunter
69 * @author Bradley S. Huffman
70 */
71 final public class Verifier {
72
73 private static final String CVS_ID =
74 "@(#) $RCSfile: Verifier.java,v $ $Revision: 1.57 $ $Date: 2009/07/23 05:54:23 $ $Name: jdom_1_1_1 $";
75
76 /**
77 * Ensure instantation cannot occur.
78 */
79 private Verifier() { }
80
81 /**
82 * This will check the supplied name to see if it is legal for use as
83 * a JDOM <code>{@link Element}</code> name.
84 *
85 * @param name <code>String</code> name to check.
86 * @return <code>String</code> reason name is illegal, or
87 * <code>null</code> if name is OK.
88 */
89 public static String checkElementName(String name) {
90 // Check basic XML name rules first
91 String reason;
92 if ((reason = checkXMLName(name)) != null) {
93 return reason;
94 }
95
96 // No colons allowed, since elements handle this internally
97 if (name.indexOf(":") != -1) {
98 return "Element names cannot contain colons";
99 }
100
101 // If we got here, everything is OK
102 return null;
103 }
104
105 /**
106 * This will check the supplied name to see if it is legal for use as
107 * a JDOM <code>{@link Attribute}</code> name.
108 *
109 * @param name <code>String</code> name to check.
110 * @return <code>String</code> reason name is illegal, or
111 * <code>null</code> if name is OK.
112 */
113 public static String checkAttributeName(String name) {
114 // Check basic XML name rules first
115 String reason;
116 if ((reason = checkXMLName(name)) != null) {
117 return reason;
118 }
119
120 // No colons are allowed, since attributes handle this internally
121 if (name.indexOf(":") != -1) {
122 return "Attribute names cannot contain colons";
123 }
124
125 // Attribute names may not be xmlns since we do this internally too
126 if (name.equals("xmlns")) {
127 return "An Attribute name may not be \"xmlns\"; " +
128 "use the Namespace class to manage namespaces";
129 }
130
131 // If we got here, everything is OK
132 return null;
133 }
134
135 /**
136 * This will check the supplied string to see if it only contains
137 * characters allowed by the XML 1.0 specification. The C0 controls
138 * (e.g. null, vertical tab, formfeed, etc.) are specifically excluded
139 * except for carriage return, linefeed, and the horizontal tab.
140 * Surrogates are also excluded.
141 * <p>
142 * This method is useful for checking element content and attribute
143 * values. Note that characters
144 * like " and &lt; are allowed in attribute values and element content.
145 * They will simply be escaped as &quot; or &lt;
146 * when the value is serialized.
147 * </p>
148 *
149 * @param text <code>String</code> value to check.
150 * @return <code>String</code> reason name is illegal, or
151 * <code>null</code> if name is OK.
152 */
153 public static String checkCharacterData(String text) {
154 if (text == null) {
155 return "A null is not a legal XML value";
156 }
157
158 // Do check
159 for (int i = 0, len = text.length(); i<len; i++) {
160
161 int ch = text.charAt(i);
162
163 // Check if high part of a surrogate pair
164 if (isHighSurrogate((char) ch)) {
165 // Check if next char is the low-surrogate
166 i++;
167 if (i < len) {
168 char low = text.charAt(i);
169 if (!isLowSurrogate(low)) {
170 return "Illegal Surrogate Pair";
171 }
172 // It's a good pair, calculate the true value of
173 // the character to then fall thru to isXMLCharacter
174 ch = decodeSurrogatePair((char) ch, low);
175 }
176 else {
177 return "Surrogate Pair Truncated";
178 }
179 }
180
181 if (!isXMLCharacter(ch)) {
182 // Likely this character can't be easily displayed
183 // because it's a control so we use it'd hexadecimal
184 // representation in the reason.
185 return ("0x" + Integer.toHexString(ch) +
186 " is not a legal XML character");
187 }
188 }
189
190 // If we got here, everything is OK
191 return null;
192 }
193
194 /**
195 * This will check the supplied data to see if it is legal for use as
196 * JDOM <code>{@link CDATA}</code>.
197 *
198 * @param data <code>String</code> data to check.
199 * @return <code>String</code> reason data is illegal, or
200 * <code>null</code> is name is OK.
201 */
202 public static String checkCDATASection(String data) {
203 String reason = null;
204 if ((reason = checkCharacterData(data)) != null) {
205 return reason;
206 }
207
208 if (data.indexOf("]]>") != -1) {
209 return "CDATA cannot internally contain a CDATA ending " +
210 "delimiter (]]>)";
211 }
212
213 // If we got here, everything is OK
214 return null;
215 }
216
217 /**
218 * This will check the supplied name to see if it is legal for use as
219 * a JDOM <code>{@link Namespace}</code> prefix.
220 *
221 * @param prefix <code>String</code> prefix to check.
222 * @return <code>String</code> reason name is illegal, or
223 * <code>null</code> if name is OK.
224 */
225 public static String checkNamespacePrefix(String prefix) {
226 // Manually do rules, since URIs can be null or empty
227 if ((prefix == null) || (prefix.equals(""))) {
228 return null;
229 }
230
231 // Cannot start with a number
232 char first = prefix.charAt(0);
233 if (isXMLDigit(first)) {
234 return "Namespace prefixes cannot begin with a number";
235 }
236 // Cannot start with a $
237 if (first == '$') {
238 return "Namespace prefixes cannot begin with a dollar sign ($)";
239 }
240 // Cannot start with a -
241 if (first == '-') {
242 return "Namespace prefixes cannot begin with a hyphen (-)";
243 }
244 // Cannot start with a .
245 if (first == '.') {
246 return "Namespace prefixes cannot begin with a period (.)";
247 }
248 // Cannot start with "xml" in any character case
249 if (prefix.toLowerCase().startsWith("xml")) {
250 return "Namespace prefixes cannot begin with " +
251 "\"xml\" in any combination of case";
252 }
253
254 // Ensure legal content
255 for (int i=0, len = prefix.length(); i<len; i++) {
256 char c = prefix.charAt(i);
257 if (!isXMLNameCharacter(c)) {
258 return "Namespace prefixes cannot contain the character \"" +
259 c + "\"";
260 }
261 }
262
263 // No colons allowed
264 if (prefix.indexOf(":") != -1) {
265 return "Namespace prefixes cannot contain colons";
266 }
267
268 // If we got here, everything is OK
269 return null;
270 }
271
272 /**
273 * This will check the supplied name to see if it is legal for use as
274 * a JDOM <code>{@link Namespace}</code> URI.
275 *
276 * @param uri <code>String</code> URI to check.
277 * @return <code>String</code> reason name is illegal, or
278 * <code>null</code> if name is OK.
279 */
280 public static String checkNamespaceURI(String uri) {
281 // Manually do rules, since URIs can be null or empty
282 if ((uri == null) || (uri.equals(""))) {
283 return null;
284 }
285
286 // Cannot start with a number
287 char first = uri.charAt(0);
288 if (Character.isDigit(first)) {
289 return "Namespace URIs cannot begin with a number";
290 }
291 // Cannot start with a $
292 if (first == '$') {
293 return "Namespace URIs cannot begin with a dollar sign ($)";
294 }
295 // Cannot start with a -
296 if (first == '-') {
297 return "Namespace URIs cannot begin with a hyphen (-)";
298 }
299
300 // If we got here, everything is OK
301 return null;
302 }
303
304 /**
305 * Check if two namespaces collide.
306 *
307 * @param namespace <code>Namespace</code> to check.
308 * @param other <code>Namespace</code> to check against.
309 * @return <code>String</code> reason for collision, or
310 * <code>null</code> if no collision.
311 */
312 public static String checkNamespaceCollision(Namespace namespace,
313 Namespace other) {
314 String p1,p2,u1,u2,reason;
315
316 reason = null;
317 p1 = namespace.getPrefix();
318 u1 = namespace.getURI();
319 p2 = other.getPrefix();
320 u2 = other.getURI();
321 if (p1.equals(p2) && !u1.equals(u2)) {
322 reason = "The namespace prefix \"" + p1 + "\" collides";
323 }
324 return reason;
325 }
326
327 /**
328 * Check if <code>{@link Attribute}</code>'s namespace collides with a
329 * <code>{@link Element}</code>'s namespace.
330 *
331 * @param attribute <code>Attribute</code> to check.
332 * @param element <code>Element</code> to check against.
333 * @return <code>String</code> reason for collision, or
334 * <code>null</code> if no collision.
335 */
336 public static String checkNamespaceCollision(Attribute attribute,
337 Element element) {
338 Namespace namespace = attribute.getNamespace();
339 String prefix = namespace.getPrefix();
340 if ("".equals(prefix)) {
341 return null;
342 }
343
344 return checkNamespaceCollision(namespace, element);
345 }
346
347 /**
348 * Check if a <code>{@link Namespace}</code> collides with a
349 * <code>{@link Element}</code>'s namespace.
350 *
351 * @param namespace <code>Namespace</code> to check.
352 * @param element <code>Element</code> to check against.
353 * @return <code>String</code> reason for collision, or
354 * <code>null</code> if no collision.
355 */
356 public static String checkNamespaceCollision(Namespace namespace,
357 Element element) {
358 String reason = checkNamespaceCollision(namespace,
359 element.getNamespace());
360 if (reason != null) {
361 return reason + " with the element namespace prefix";
362 }
363
364 reason = checkNamespaceCollision(namespace,
365 element.getAdditionalNamespaces());
366 if (reason != null) {
367 return reason;
368 }
369
370 reason = checkNamespaceCollision(namespace, element.getAttributes());
371 if (reason != null) {
372 return reason;
373 }
374
375 return null;
376 }
377
378 /**
379 * Check if a <code>{@link Namespace}</code> collides with a
380 * <code>{@link Attribute}</code>'s namespace.
381 *
382 * @param namespace <code>Namespace</code> to check.
383 * @param attribute <code>Attribute</code> to check against.
384 * @return <code>String</code> reason for collision, or
385 * <code>null</code> if no collision.
386 */
387 public static String checkNamespaceCollision(Namespace namespace,
388 Attribute attribute) {
389 String reason = null;
390 if (!attribute.getNamespace().equals(Namespace.NO_NAMESPACE)) {
391 reason = checkNamespaceCollision(namespace,
392 attribute.getNamespace());
393 if (reason != null) {
394 reason += " with an attribute namespace prefix on the element";
395 }
396 }
397 return reason;
398 }
399
400 /**
401 * Check if a <code>{@link Namespace}</code> collides with any namespace
402 * from a list of objects.
403 *
404 * @param namespace <code>Namespace</code> to check.
405 * @param list <code>List</code> to check against.
406 * @return <code>String</code> reason for collision, or
407 * <code>null</code> if no collision.
408 */
409 public static String checkNamespaceCollision(Namespace namespace,
410 List list) {
411 if (list == null) {
412 return null;
413 }
414
415 String reason = null;
416 Iterator i = list.iterator();
417 while ((reason == null) && i.hasNext()) {
418 Object obj = i.next();
419 if (obj instanceof Attribute) {
420 reason = checkNamespaceCollision(namespace, (Attribute) obj);
421 }
422 else if (obj instanceof Element) {
423 reason = checkNamespaceCollision(namespace, (Element) obj);
424 }
425 else if (obj instanceof Namespace) {
426 reason = checkNamespaceCollision(namespace, (Namespace) obj);
427 if (reason != null) {
428 reason += " with an additional namespace declared" +
429 " by the element";
430 }
431 }
432 }
433 return reason;
434 }
435
436 /**
437 * This will check the supplied data to see if it is legal for use as
438 * a JDOM <code>{@link ProcessingInstruction}</code> target.
439 *
440 * @param target <code>String</code> target to check.
441 * @return <code>String</code> reason target is illegal, or
442 * <code>null</code> if target is OK.
443 */
444 public static String checkProcessingInstructionTarget(String target) {
445 // Check basic XML name rules first
446 String reason;
447 if ((reason = checkXMLName(target)) != null) {
448 return reason;
449 }
450
451 // No colons allowed, per Namespace Specification Section 6
452 if (target.indexOf(":") != -1) {
453 return "Processing instruction targets cannot contain colons";
454 }
455
456 // Cannot begin with 'xml' in any case
457 if (target.equalsIgnoreCase("xml")) {
458 return "Processing instructions cannot have a target of " +
459 "\"xml\" in any combination of case. (Note that the " +
460 "\"<?xml ... ?>\" declaration at the beginning of a " +
461 "document is not a processing instruction and should not " +
462 "be added as one; it is written automatically during " +
463 "output, e.g. by XMLOutputter.)";
464 }
465
466 // If we got here, everything is OK
467 return null;
468 }
469
470 /**
471 * This will check the supplied data to see if it is legal for use as
472 * <code>{@link ProcessingInstruction}</code> data. Besides checking that
473 * all the characters are allowed in XML, this also checks
474 * that the data does not contain the PI end-string "?&gt;".
475 *
476 * @param data <code>String</code> data to check.
477 * @return <code>String</code> reason data is illegal, or
478 * <code>null</code> if data is OK.
479 */
480 public static String checkProcessingInstructionData(String data) {
481 // Check basic XML name rules first
482 String reason = checkCharacterData(data);
483
484 if (reason == null) {
485 if (data.indexOf("?>") >= 0) {
486 return "Processing instructions cannot contain " +
487 "the string \"?>\"";
488 }
489 }
490
491 return reason;
492 }
493
494 /**
495 * This will check the supplied data to see if it is legal for use as
496 * JDOM <code>{@link Comment}</code> data.
497 *
498 * @param data <code>String</code> data to check.
499 * @return <code>String</code> reason data is illegal, or
500 * <code>null</code> if data is OK.
501 */
502 public static String checkCommentData(String data) {
503 String reason = null;
504 if ((reason = checkCharacterData(data)) != null) {
505 return reason;
506 }
507
508 if (data.indexOf("--") != -1) {
509 return "Comments cannot contain double hyphens (--)";
510 }
511 if (data.endsWith("-")) {
512 return "Comment data cannot end with a hyphen.";
513 }
514
515 // If we got here, everything is OK
516 return null;
517 }
518 /**
519 * This is a utility function to decode a non-BMP
520 * UTF-16 surrogate pair.
521 * @param high high 16 bits
522 * @param low low 16 bits
523 * @return decoded character
524 */
525 public static int decodeSurrogatePair(char high, char low) {
526 return 0x10000 + (high - 0xD800) * 0x400 + (low - 0xDC00);
527 }
528
529 // [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] |
530 // [-'()+,./:=?;*#@$_%]
531 public static boolean isXMLPublicIDCharacter(char c) {
532
533 if (c >= 'a' && c <= 'z') return true;
534 if (c >= '?' && c <= 'Z') return true;
535 if (c >= '\'' && c <= ';') return true;
536
537 if (c == ' ') return true;
538 if (c == '!') return true;
539 if (c == '=') return true;
540 if (c == '#') return true;
541 if (c == '$') return true;
542 if (c == '_') return true;
543 if (c == '%') return true;
544 if (c == '\n') return true;
545 if (c == '\r') return true;
546 if (c == '\t') return true;
547
548 return false;
549 }
550
551 /**
552 * This will ensure that the data for a public identifier
553 * is legal.
554 *
555 * @param publicID <code>String</code> public ID to check.
556 * @return <code>String</code> reason public ID is illegal, or
557 * <code>null</code> if public ID is OK.
558 */
559 public static String checkPublicID(String publicID) {
560 String reason = null;
561
562 if (publicID == null) return null;
563 // This indicates there is no public ID
564
565 for (int i = 0; i < publicID.length(); i++) {
566 char c = publicID.charAt(i);
567 if (!isXMLPublicIDCharacter(c)) {
568 reason = c + " is not a legal character in public IDs";
569 break;
570 }
571 }
572
573 return reason;
574 }
575
576
577 /**
578 * This will ensure that the data for a system literal
579 * is legal.
580 *
581 * @param systemLiteral <code>String</code> system literal to check.
582 * @return <code>String</code> reason system literal is illegal, or
583 * <code>null</code> if system literal is OK.
584 */
585 public static String checkSystemLiteral(String systemLiteral) {
586 String reason = null;
587
588 if (systemLiteral == null) return null;
589 // This indicates there is no system ID
590
591 if (systemLiteral.indexOf('\'') != -1
592 && systemLiteral.indexOf('"') != -1) {
593 reason =
594 "System literals cannot simultaneously contain both single and double quotes.";
595 }
596 else {
597 reason = checkCharacterData(systemLiteral);
598 }
599
600 return reason;
601 }
602
603 /**
604 * This is a utility function for sharing the base process of checking
605 * any XML name.
606 *
607 * @param name <code>String</code> to check for XML name compliance.
608 * @return <code>String</code> reason the name is illegal, or
609 * <code>null</code> if OK.
610 */
611 public static String checkXMLName(String name) {
612 // Cannot be empty or null
613 if ((name == null) || (name.length() == 0)
614 || (name.trim().equals(""))) {
615 return "XML names cannot be null or empty";
616 }
617
618
619 // Cannot start with a number
620 char first = name.charAt(0);
621 if (!isXMLNameStartCharacter(first)) {
622 return "XML names cannot begin with the character \"" +
623 first + "\"";
624 }
625 // Ensure legal content for non-first chars
626 for (int i=1, len = name.length(); i<len; i++) {
627 char c = name.charAt(i);
628 if (!isXMLNameCharacter(c)) {
629 return "XML names cannot contain the character \"" + c + "\"";
630 }
631 }
632
633 // We got here, so everything is OK
634 return null;
635 }
636
637 /**
638 * <p>
639 * Checks a string to see if it is a legal RFC 2396 URI.
640 * Both absolute and relative URIs are supported.
641 * </p>
642 *
643 * @param uri <code>String</code> to check.
644 * @return <code>String</code> reason the URI is illegal, or
645 * <code>null</code> if OK.
646 */
647 public static String checkURI(String uri) {
648 // URIs can be null or empty
649 if ((uri == null) || (uri.equals(""))) {
650 return null;
651 }
652
653 for (int i = 0; i < uri.length(); i++) {
654 char test = uri.charAt(i);
655 if (!isURICharacter(test)) {
656 String msgNumber = "0x" + Integer.toHexString(test);
657 if (test <= 0x09) msgNumber = "0x0" + Integer.toHexString(test);
658 return "URIs cannot contain " + msgNumber;
659 } // end if
660 if (test == '%') { // must be followed by two hexadecimal digits
661 try {
662 char firstDigit = uri.charAt(i+1);
663 char secondDigit = uri.charAt(i+2);
664 if (!isHexDigit(firstDigit) ||
665 !isHexDigit(secondDigit)) {
666 return "Percent signs in URIs must be followed by "
667 + "exactly two hexadecimal digits.";
668 }
669
670 }
671 catch (StringIndexOutOfBoundsException e) {
672 return "Percent signs in URIs must be followed by "
673 + "exactly two hexadecimal digits.";
674 }
675 }
676 } // end for
677
678 // If we got here, everything is OK
679 return null;
680 }
681
682 /**
683 * <p>
684 * This is a utility function for determining whether a specified
685 * Unicode character is a hexadecimal digit as defined in RFC 2396;
686 * that is, one of the ASCII characters 0-9, a-f, or A-F.
687 * </p>
688 *
689 * @param c to check for hex digit.
690 * @return true if it's allowed, false otherwise.
691 */
692 public static boolean isHexDigit(char c) {
693
694 // I suspect most characters passed to this method will be
695 // correct hexadecimal digits, so I test for the true cases
696 // first. If this proves to be a performance bottleneck
697 // a switch statement or lookup table
698 // might optimize this.
699 if (c >= '0' && c <= '9') return true;
700 if (c >= 'A' && c <= 'F') return true;
701 if (c >= 'a' && c <= 'f') return true;
702
703 return false;
704 }
705
706 /**
707 * This is a function for determining whether the
708 * specified character is the high 16 bits in a
709 * UTF-16 surrogate pair.
710 * @param ch character to check
711 * @return true if the character is a high surrogate, false otherwise
712 */
713 public static boolean isHighSurrogate(char ch) {
714 return (ch >= 0xD800 && ch <= 0xDBFF);
715 }
716
717 /**
718 * This is a function for determining whether the
719 * specified character is the low 16 bits in a
720 * UTF-16 surrogate pair.
721 * @param ch character to check
722 * @return true if the character is a low surrogate, false otherwise.
723 */
724 public static boolean isLowSurrogate(char ch) {
725 return (ch >= 0xDC00 && ch <= 0xDFFF);
726 }
727
728 /**
729 * <p>
730 * This is a utility function for determining whether
731 * a specified Unicode character is legal in URI references
732 * as determined by RFC 2396.
733 * </p>
734 *
735 * @param c <code>char</code> to check for URI reference compliance.
736 * @return true if it's allowed, false otherwise.
737 */
738 public static boolean isURICharacter(char c) {
739 if (c >= 'a' && c <= 'z') return true;
740 if (c >= 'A' && c <= 'Z') return true;
741 if (c >= '0' && c <= '9') return true;
742 if (c == '/') return true;
743 if (c == '-') return true;
744 if (c == '.') return true;
745 if (c == '?') return true;
746 if (c == ':') return true;
747 if (c == '@') return true;
748 if (c == '&') return true;
749 if (c == '=') return true;
750 if (c == '+') return true;
751 if (c == '$') return true;
752 if (c == ',') return true;
753 if (c == '%') return true;
754
755 if (c == '_') return true;
756 if (c == '!') return true;
757 if (c == '~') return true;
758 if (c == '*') return true;
759 if (c == '\'') return true;
760 if (c == '(') return true;
761 if (c == ')') return true;
762 return false;
763 }
764
765 /**
766 * This is a utility function for determining whether a specified
767 * character is a character according to production 2 of the
768 * XML 1.0 specification.
769 *
770 * @param c <code>char</code> to check for XML compliance
771 * @return <code>boolean</code> true if it's a character,
772 * false otherwise
773 */
774 public static boolean isXMLCharacter(int c) {
775
776 if (c == '\n') return true;
777 if (c == '\r') return true;
778 if (c == '\t') return true;
779
780 if (c < 0x20) return false; if (c <= 0xD7FF) return true;
781 if (c < 0xE000) return false; if (c <= 0xFFFD) return true;
782 if (c < 0x10000) return false; if (c <= 0x10FFFF) return true;
783
784 return false;
785 }
786
787
788 /**
789 * This is a utility function for determining whether a specified
790 * character is a name character according to production 4 of the
791 * XML 1.0 specification.
792 *
793 * @param c <code>char</code> to check for XML name compliance.
794 * @return <code>boolean</code> true if it's a name character,
795 * false otherwise.
796 */
797 public static boolean isXMLNameCharacter(char c) {
798
799 return (isXMLLetter(c) || isXMLDigit(c) || c == '.' || c == '-'
800 || c == '_' || c == ':' || isXMLCombiningChar(c)
801 || isXMLExtender(c));
802 }
803
804 /**
805 * This is a utility function for determining whether a specified
806 * character is a legal name start character according to production 5
807 * of the XML 1.0 specification. This production does allow names
808 * to begin with colons which the Namespaces in XML Recommendation
809 * disallows.
810 *
811 * @param c <code>char</code> to check for XML name start compliance.
812 * @return <code>boolean</code> true if it's a name start character,
813 * false otherwise.
814 */
815 public static boolean isXMLNameStartCharacter(char c) {
816
817 return (isXMLLetter(c) || c == '_' || c ==':');
818
819 }
820
821 /**
822 * This is a utility function for determining whether a specified
823 * character is a letter or digit according to productions 84 and 88
824 * of the XML 1.0 specification.
825 *
826 * @param c <code>char</code> to check.
827 * @return <code>boolean</code> true if it's letter or digit,
828 * false otherwise.
829 */
830 public static boolean isXMLLetterOrDigit(char c) {
831
832 return (isXMLLetter(c) || isXMLDigit(c));
833
834 }
835
836 /**
837 * This is a utility function for determining whether a specified character
838 * is a letter according to production 84 of the XML 1.0 specification.
839 *
840 * @param c <code>char</code> to check for XML name compliance.
841 * @return <code>String</code> true if it's a letter, false otherwise.
842 */
843 public static boolean isXMLLetter(char c) {
844 // Note that order is very important here. The search proceeds
845 // from lowest to highest values, so that no searching occurs
846 // above the character's value. BTW, the first line is equivalent to:
847 // if (c >= 0x0041 && c <= 0x005A) return true;
848
849 if (c < 0x0041) return false; if (c <= 0x005a) return true;
850 if (c < 0x0061) return false; if (c <= 0x007A) return true;
851 if (c < 0x00C0) return false; if (c <= 0x00D6) return true;
852 if (c < 0x00D8) return false; if (c <= 0x00F6) return true;
853 if (c < 0x00F8) return false; if (c <= 0x00FF) return true;
854 if (c < 0x0100) return false; if (c <= 0x0131) return true;
855 if (c < 0x0134) return false; if (c <= 0x013E) return true;
856 if (c < 0x0141) return false; if (c <= 0x0148) return true;
857 if (c < 0x014A) return false; if (c <= 0x017E) return true;
858 if (c < 0x0180) return false; if (c <= 0x01C3) return true;
859 if (c < 0x01CD) return false; if (c <= 0x01F0) return true;
860 if (c < 0x01F4) return false; if (c <= 0x01F5) return true;
861 if (c < 0x01FA) return false; if (c <= 0x0217) return true;
862 if (c < 0x0250) return false; if (c <= 0x02A8) return true;
863 if (c < 0x02BB) return false; if (c <= 0x02C1) return true;
864 if (c == 0x0386) return true;
865 if (c < 0x0388) return false; if (c <= 0x038A) return true;
866 if (c == 0x038C) return true;
867 if (c < 0x038E) return false; if (c <= 0x03A1) return true;
868 if (c < 0x03A3) return false; if (c <= 0x03CE) return true;
869 if (c < 0x03D0) return false; if (c <= 0x03D6) return true;
870 if (c == 0x03DA) return true;
871 if (c == 0x03DC) return true;
872 if (c == 0x03DE) return true;
873 if (c == 0x03E0) return true;
874 if (c < 0x03E2) return false; if (c <= 0x03F3) return true;
875 if (c < 0x0401) return false; if (c <= 0x040C) return true;
876 if (c < 0x040E) return false; if (c <= 0x044F) return true;
877 if (c < 0x0451) return false; if (c <= 0x045C) return true;
878 if (c < 0x045E) return false; if (c <= 0x0481) return true;
879 if (c < 0x0490) return false; if (c <= 0x04C4) return true;
880 if (c < 0x04C7) return false; if (c <= 0x04C8) return true;
881 if (c < 0x04CB) return false; if (c <= 0x04CC) return true;
882 if (c < 0x04D0) return false; if (c <= 0x04EB) return true;
883 if (c < 0x04EE) return false; if (c <= 0x04F5) return true;
884 if (c < 0x04F8) return false; if (c <= 0x04F9) return true;
885 if (c < 0x0531) return false; if (c <= 0x0556) return true;
886 if (c == 0x0559) return true;
887 if (c < 0x0561) return false; if (c <= 0x0586) return true;
888 if (c < 0x05D0) return false; if (c <= 0x05EA) return true;
889 if (c < 0x05F0) return false; if (c <= 0x05F2) return true;
890 if (c < 0x0621) return false; if (c <= 0x063A) return true;
891 if (c < 0x0641) return false; if (c <= 0x064A) return true;
892 if (c < 0x0671) return false; if (c <= 0x06B7) return true;
893 if (c < 0x06BA) return false; if (c <= 0x06BE) return true;
894 if (c < 0x06C0) return false; if (c <= 0x06CE) return true;
895 if (c < 0x06D0) return false; if (c <= 0x06D3) return true;
896 if (c == 0x06D5) return true;
897 if (c < 0x06E5) return false; if (c <= 0x06E6) return true;
898 if (c < 0x0905) return false; if (c <= 0x0939) return true;
899 if (c == 0x093D) return true;
900 if (c < 0x0958) return false; if (c <= 0x0961) return true;
901 if (c < 0x0985) return false; if (c <= 0x098C) return true;
902 if (c < 0x098F) return false; if (c <= 0x0990) return true;
903 if (c < 0x0993) return false; if (c <= 0x09A8) return true;
904 if (c < 0x09AA) return false; if (c <= 0x09B0) return true;
905 if (c == 0x09B2) return true;
906 if (c < 0x09B6) return false; if (c <= 0x09B9) return true;
907 if (c < 0x09DC) return false; if (c <= 0x09DD) return true;
908 if (c < 0x09DF) return false; if (c <= 0x09E1) return true;
909 if (c < 0x09F0) return false; if (c <= 0x09F1) return true;
910 if (c < 0x0A05) return false; if (c <= 0x0A0A) return true;
911 if (c < 0x0A0F) return false; if (c <= 0x0A10) return true;
912 if (c < 0x0A13) return false; if (c <= 0x0A28) return true;
913 if (c < 0x0A2A) return false; if (c <= 0x0A30) return true;
914 if (c < 0x0A32) return false; if (c <= 0x0A33) return true;
915 if (c < 0x0A35) return false; if (c <= 0x0A36) return true;
916 if (c < 0x0A38) return false; if (c <= 0x0A39) return true;
917 if (c < 0x0A59) return false; if (c <= 0x0A5C) return true;
918 if (c == 0x0A5E) return true;
919 if (c < 0x0A72) return false; if (c <= 0x0A74) return true;
920 if (c < 0x0A85) return false; if (c <= 0x0A8B) return true;
921 if (c == 0x0A8D) return true;
922 if (c < 0x0A8F) return false; if (c <= 0x0A91) return true;
923 if (c < 0x0A93) return false; if (c <= 0x0AA8) return true;
924 if (c < 0x0AAA) return false; if (c <= 0x0AB0) return true;
925 if (c < 0x0AB2) return false; if (c <= 0x0AB3) return true;
926 if (c < 0x0AB5) return false; if (c <= 0x0AB9) return true;
927 if (c == 0x0ABD) return true;
928 if (c == 0x0AE0) return true;
929 if (c < 0x0B05) return false; if (c <= 0x0B0C) return true;
930 if (c < 0x0B0F) return false; if (c <= 0x0B10) return true;
931 if (c < 0x0B13) return false; if (c <= 0x0B28) return true;
932 if (c < 0x0B2A) return false; if (c <= 0x0B30) return true;
933 if (c < 0x0B32) return false; if (c <= 0x0B33) return true;
934 if (c < 0x0B36) return false; if (c <= 0x0B39) return true;
935 if (c == 0x0B3D) return true;
936 if (c < 0x0B5C) return false; if (c <= 0x0B5D) return true;
937 if (c < 0x0B5F) return false; if (c <= 0x0B61) return true;
938 if (c < 0x0B85) return false; if (c <= 0x0B8A) return true;
939 if (c < 0x0B8E) return false; if (c <= 0x0B90) return true;
940 if (c < 0x0B92) return false; if (c <= 0x0B95) return true;
941 if (c < 0x0B99) return false; if (c <= 0x0B9A) return true;
942 if (c == 0x0B9C) return true;
943 if (c < 0x0B9E) return false; if (c <= 0x0B9F) return true;
944 if (c < 0x0BA3) return false; if (c <= 0x0BA4) return true;
945 if (c < 0x0BA8) return false; if (c <= 0x0BAA) return true;
946 if (c < 0x0BAE) return false; if (c <= 0x0BB5) return true;
947 if (c < 0x0BB7) return false; if (c <= 0x0BB9) return true;
948 if (c < 0x0C05) return false; if (c <= 0x0C0C) return true;
949 if (c < 0x0C0E) return false; if (c <= 0x0C10) return true;
950 if (c < 0x0C12) return false; if (c <= 0x0C28) return true;
951 if (c < 0x0C2A) return false; if (c <= 0x0C33) return true;
952 if (c < 0x0C35) return false; if (c <= 0x0C39) return true;
953 if (c < 0x0C60) return false; if (c <= 0x0C61) return true;
954 if (c < 0x0C85) return false; if (c <= 0x0C8C) return true;
955 if (c < 0x0C8E) return false; if (c <= 0x0C90) return true;
956 if (c < 0x0C92) return false; if (c <= 0x0CA8) return true;
957 if (c < 0x0CAA) return false; if (c <= 0x0CB3) return true;
958 if (c < 0x0CB5) return false; if (c <= 0x0CB9) return true;
959 if (c == 0x0CDE) return true;
960 if (c < 0x0CE0) return false; if (c <= 0x0CE1) return true;
961 if (c < 0x0D05) return false; if (c <= 0x0D0C) return true;
962 if (c < 0x0D0E) return false; if (c <= 0x0D10) return true;
963 if (c < 0x0D12) return false; if (c <= 0x0D28) return true;
964 if (c < 0x0D2A) return false; if (c <= 0x0D39) return true;
965 if (c < 0x0D60) return false; if (c <= 0x0D61) return true;
966 if (c < 0x0E01) return false; if (c <= 0x0E2E) return true;
967 if (c == 0x0E30) return true;
968 if (c < 0x0E32) return false; if (c <= 0x0E33) return true;
969 if (c < 0x0E40) return false; if (c <= 0x0E45) return true;
970 if (c < 0x0E81) return false; if (c <= 0x0E82) return true;
971 if (c == 0x0E84) return true;
972 if (c < 0x0E87) return false; if (c <= 0x0E88) return true;
973 if (c == 0x0E8A) return true;
974 if (c == 0x0E8D) return true;
975 if (c < 0x0E94) return false; if (c <= 0x0E97) return true;
976 if (c < 0x0E99) return false; if (c <= 0x0E9F) return true;
977 if (c < 0x0EA1) return false; if (c <= 0x0EA3) return true;
978 if (c == 0x0EA5) return true;
979 if (c == 0x0EA7) return true;
980 if (c < 0x0EAA) return false; if (c <= 0x0EAB) return true;
981 if (c < 0x0EAD) return false; if (c <= 0x0EAE) return true;
982 if (c == 0x0EB0) return true;
983 if (c < 0x0EB2) return false; if (c <= 0x0EB3) return true;
984 if (c == 0x0EBD) return true;
985 if (c < 0x0EC0) return false; if (c <= 0x0EC4) return true;
986 if (c < 0x0F40) return false; if (c <= 0x0F47) return true;
987 if (c < 0x0F49) return false; if (c <= 0x0F69) return true;
988 if (c < 0x10A0) return false; if (c <= 0x10C5) return true;
989 if (c < 0x10D0) return false; if (c <= 0x10F6) return true;
990 if (c == 0x1100) return true;
991 if (c < 0x1102) return false; if (c <= 0x1103) return true;
992 if (c < 0x1105) return false; if (c <= 0x1107) return true;
993 if (c == 0x1109) return true;
994 if (c < 0x110B) return false; if (c <= 0x110C) return true;
995 if (c < 0x110E) return false; if (c <= 0x1112) return true;
996 if (c == 0x113C) return true;
997 if (c == 0x113E) return true;
998 if (c == 0x1140) return true;
999 if (c == 0x114C) return true;
1000 if (c == 0x114E) return true;
1001 if (c == 0x1150) return true;
1002 if (c < 0x1154) return false; if (c <= 0x1155) return true;
1003 if (c == 0x1159) return true;
1004 if (c < 0x115F) return false; if (c <= 0x1161) return true;
1005 if (c == 0x1163) return true;
1006 if (c == 0x1165) return true;
1007 if (c == 0x1167) return true;
1008 if (c == 0x1169) return true;
1009 if (c < 0x116D) return false; if (c <= 0x116E) return true;
1010 if (c < 0x1172) return false; if (c <= 0x1173) return true;
1011 if (c == 0x1175) return true;
1012 if (c == 0x119E) return true;
1013 if (c == 0x11A8) return true;
1014 if (c == 0x11AB) return true;
1015 if (c < 0x11AE) return false; if (c <= 0x11AF) return true;
1016 if (c < 0x11B7) return false; if (c <= 0x11B8) return true;
1017 if (c == 0x11BA) return true;
1018 if (c < 0x11BC) return false; if (c <= 0x11C2) return true;
1019 if (c == 0x11EB) return true;
1020 if (c == 0x11F0) return true;
1021 if (c == 0x11F9) return true;
1022 if (c < 0x1E00) return false; if (c <= 0x1E9B) return true;
1023 if (c < 0x1EA0) return false; if (c <= 0x1EF9) return true;
1024 if (c < 0x1F00) return false; if (c <= 0x1F15) return true;
1025 if (c < 0x1F18) return false; if (c <= 0x1F1D) return true;
1026 if (c < 0x1F20) return false; if (c <= 0x1F45) return true;
1027 if (c < 0x1F48) return false; if (c <= 0x1F4D) return true;
1028 if (c < 0x1F50) return false; if (c <= 0x1F57) return true;
1029 if (c == 0x1F59) return true;
1030 if (c == 0x1F5B) return true;
1031 if (c == 0x1F5D) return true;
1032 if (c < 0x1F5F) return false; if (c <= 0x1F7D) return true;
1033 if (c < 0x1F80) return false; if (c <= 0x1FB4) return true;
1034 if (c < 0x1FB6) return false; if (c <= 0x1FBC) return true;
1035 if (c == 0x1FBE) return true;
1036 if (c < 0x1FC2) return false; if (c <= 0x1FC4) return true;
1037 if (c < 0x1FC6) return false; if (c <= 0x1FCC) return true;
1038 if (c < 0x1FD0) return false; if (c <= 0x1FD3) return true;
1039 if (c < 0x1FD6) return false; if (c <= 0x1FDB) return true;
1040 if (c < 0x1FE0) return false; if (c <= 0x1FEC) return true;
1041 if (c < 0x1FF2) return false; if (c <= 0x1FF4) return true;
1042 if (c < 0x1FF6) return false; if (c <= 0x1FFC) return true;
1043 if (c == 0x2126) return true;
1044 if (c < 0x212A) return false; if (c <= 0x212B) return true;
1045 if (c == 0x212E) return true;
1046 if (c < 0x2180) return false; if (c <= 0x2182) return true;
1047 if (c == 0x3007) return true; // ideographic
1048 if (c < 0x3021) return false; if (c <= 0x3029) return true; // ideo
1049 if (c < 0x3041) return false; if (c <= 0x3094) return true;
1050 if (c < 0x30A1) return false; if (c <= 0x30FA) return true;
1051 if (c < 0x3105) return false; if (c <= 0x312C) return true;
1052 if (c < 0x4E00) return false; if (c <= 0x9FA5) return true; // ideo
1053 if (c < 0xAC00) return false; if (c <= 0xD7A3) return true;
1054
1055 return false;
1056
1057 }
1058
1059 /**
1060 * This is a utility function for determining whether a specified character
1061 * is a combining character according to production 87
1062 * of the XML 1.0 specification.
1063 *
1064 * @param c <code>char</code> to check.
1065 * @return <code>boolean</code> true if it's a combining character,
1066 * false otherwise.
1067 */
1068 public static boolean isXMLCombiningChar(char c) {
1069 // CombiningChar
1070 if (c < 0x0300) return false; if (c <= 0x0345) return true;
1071 if (c < 0x0360) return false; if (c <= 0x0361) return true;
1072 if (c < 0x0483) return false; if (c <= 0x0486) return true;
1073 if (c < 0x0591) return false; if (c <= 0x05A1) return true;
1074
1075 if (c < 0x05A3) return false; if (c <= 0x05B9) return true;
1076 if (c < 0x05BB) return false; if (c <= 0x05BD) return true;
1077 if (c == 0x05BF) return true;
1078 if (c < 0x05C1) return false; if (c <= 0x05C2) return true;
1079
1080 if (c == 0x05C4) return true;
1081 if (c < 0x064B) return false; if (c <= 0x0652) return true;
1082 if (c == 0x0670) return true;
1083 if (c < 0x06D6) return false; if (c <= 0x06DC) return true;
1084
1085 if (c < 0x06DD) return false; if (c <= 0x06DF) return true;
1086 if (c < 0x06E0) return false; if (c <= 0x06E4) return true;
1087 if (c < 0x06E7) return false; if (c <= 0x06E8) return true;
1088
1089 if (c < 0x06EA) return false; if (c <= 0x06ED) return true;
1090 if (c < 0x0901) return false; if (c <= 0x0903) return true;
1091 if (c == 0x093C) return true;
1092 if (c < 0x093E) return false; if (c <= 0x094C) return true;
1093
1094 if (c == 0x094D) return true;
1095 if (c < 0x0951) return false; if (c <= 0x0954) return true;
1096 if (c < 0x0962) return false; if (c <= 0x0963) return true;
1097 if (c < 0x0981) return false; if (c <= 0x0983) return true;
1098
1099 if (c == 0x09BC) return true;
1100 if (c == 0x09BE) return true;
1101 if (c == 0x09BF) return true;
1102 if (c < 0x09C0) return false; if (c <= 0x09C4) return true;
1103 if (c < 0x09C7) return false; if (c <= 0x09C8) return true;
1104
1105 if (c < 0x09CB) return false; if (c <= 0x09CD) return true;
1106 if (c == 0x09D7) return true;
1107 if (c < 0x09E2) return false; if (c <= 0x09E3) return true;
1108 if (c == 0x0A02) return true;
1109 if (c == 0x0A3C) return true;
1110
1111 if (c == 0x0A3E) return true;
1112 if (c == 0x0A3F) return true;
1113 if (c < 0x0A40) return false; if (c <= 0x0A42) return true;
1114 if (c < 0x0A47) return false; if (c <= 0x0A48) return true;
1115
1116 if (c < 0x0A4B) return false; if (c <= 0x0A4D) return true;
1117 if (c < 0x0A70) return false; if (c <= 0x0A71) return true;
1118 if (c < 0x0A81) return false; if (c <= 0x0A83) return true;
1119 if (c == 0x0ABC) return true;
1120
1121 if (c < 0x0ABE) return false; if (c <= 0x0AC5) return true;
1122 if (c < 0x0AC7) return false; if (c <= 0x0AC9) return true;
1123 if (c < 0x0ACB) return false; if (c <= 0x0ACD) return true;
1124
1125 if (c < 0x0B01) return false; if (c <= 0x0B03) return true;
1126 if (c == 0x0B3C) return true;
1127 if (c < 0x0B3E) return false; if (c <= 0x0B43) return true;
1128 if (c < 0x0B47) return false; if (c <= 0x0B48) return true;
1129
1130 if (c < 0x0B4B) return false; if (c <= 0x0B4D) return true;
1131 if (c < 0x0B56) return false; if (c <= 0x0B57) return true;
1132 if (c < 0x0B82) return false; if (c <= 0x0B83) return true;
1133
1134 if (c < 0x0BBE) return false; if (c <= 0x0BC2) return true;
1135 if (c < 0x0BC6) return false; if (c <= 0x0BC8) return true;
1136 if (c < 0x0BCA) return false; if (c <= 0x0BCD) return true;
1137 if (c == 0x0BD7) return true;
1138
1139 if (c < 0x0C01) return false; if (c <= 0x0C03) return true;
1140 if (c < 0x0C3E) return false; if (c <= 0x0C44) return true;
1141 if (c < 0x0C46) return false; if (c <= 0x0C48) return true;
1142
1143 if (c < 0x0C4A) return false; if (c <= 0x0C4D) return true;
1144 if (c < 0x0C55) return false; if (c <= 0x0C56) return true;
1145 if (c < 0x0C82) return false; if (c <= 0x0C83) return true;
1146
1147 if (c < 0x0CBE) return false; if (c <= 0x0CC4) return true;
1148 if (c < 0x0CC6) return false; if (c <= 0x0CC8) return true;
1149 if (c < 0x0CCA) return false; if (c <= 0x0CCD) return true;
1150
1151 if (c < 0x0CD5) return false; if (c <= 0x0CD6) return true;
1152 if (c < 0x0D02) return false; if (c <= 0x0D03) return true;
1153 if (c < 0x0D3E) return false; if (c <= 0x0D43) return true;
1154
1155 if (c < 0x0D46) return false; if (c <= 0x0D48) return true;
1156 if (c < 0x0D4A) return false; if (c <= 0x0D4D) return true;
1157 if (c == 0x0D57) return true;
1158 if (c == 0x0E31) return true;
1159
1160 if (c < 0x0E34) return false; if (c <= 0x0E3A) return true;
1161 if (c < 0x0E47) return false; if (c <= 0x0E4E) return true;
1162 if (c == 0x0EB1) return true;
1163 if (c < 0x0EB4) return false; if (c <= 0x0EB9) return true;
1164
1165 if (c < 0x0EBB) return false; if (c <= 0x0EBC) return true;
1166 if (c < 0x0EC8) return false; if (c <= 0x0ECD) return true;
1167 if (c < 0x0F18) return false; if (c <= 0x0F19) return true;
1168 if (c == 0x0F35) return true;
1169
1170 if (c == 0x0F37) return true;
1171 if (c == 0x0F39) return true;
1172 if (c == 0x0F3E) return true;
1173 if (c == 0x0F3F) return true;
1174 if (c < 0x0F71) return false; if (c <= 0x0F84) return true;
1175
1176 if (c < 0x0F86) return false; if (c <= 0x0F8B) return true;
1177 if (c < 0x0F90) return false; if (c <= 0x0F95) return true;
1178 if (c == 0x0F97) return true;
1179 if (c < 0x0F99) return false; if (c <= 0x0FAD) return true;
1180
1181 if (c < 0x0FB1) return false; if (c <= 0x0FB7) return true;
1182 if (c == 0x0FB9) return true;
1183 if (c < 0x20D0) return false; if (c <= 0x20DC) return true;
1184 if (c == 0x20E1) return true;
1185
1186 if (c < 0x302A) return false; if (c <= 0x302F) return true;
1187 if (c == 0x3099) return true;
1188 if (c == 0x309A) return true;
1189
1190 return false;
1191
1192 }
1193
1194 /**
1195 * This is a utility function for determining whether a specified
1196 * character is an extender according to production 88 of the XML 1.0
1197 * specification.
1198 *
1199 * @param c <code>char</code> to check.
1200 * @return <code>String</code> true if it's an extender, false otherwise.
1201 */
1202 public static boolean isXMLExtender(char c) {
1203
1204 if (c < 0x00B6) return false; // quick short circuit
1205
1206 // Extenders
1207 if (c == 0x00B7) return true;
1208 if (c == 0x02D0) return true;
1209 if (c == 0x02D1) return true;
1210 if (c == 0x0387) return true;
1211 if (c == 0x0640) return true;
1212 if (c == 0x0E46) return true;
1213 if (c == 0x0EC6) return true;
1214 if (c == 0x3005) return true;
1215
1216 if (c < 0x3031) return false; if (c <= 0x3035) return true;
1217 if (c < 0x309D) return false; if (c <= 0x309E) return true;
1218 if (c < 0x30FC) return false; if (c <= 0x30FE) return true;
1219
1220 return false;
1221
1222 }
1223
1224 /**
1225 * This is a utility function for determining whether a specified
1226 * Unicode character
1227 * is a digit according to production 88 of the XML 1.0 specification.
1228 *
1229 * @param c <code>char</code> to check for XML digit compliance
1230 * @return <code>boolean</code> true if it's a digit, false otherwise
1231 */
1232 public static boolean isXMLDigit(char c) {
1233
1234 if (c < 0x0030) return false; if (c <= 0x0039) return true;
1235 if (c < 0x0660) return false; if (c <= 0x0669) return true;
1236 if (c < 0x06F0) return false; if (c <= 0x06F9) return true;
1237 if (c < 0x0966) return false; if (c <= 0x096F) return true;
1238
1239 if (c < 0x09E6) return false; if (c <= 0x09EF) return true;
1240 if (c < 0x0A66) return false; if (c <= 0x0A6F) return true;
1241 if (c < 0x0AE6) return false; if (c <= 0x0AEF) return true;
1242
1243 if (c < 0x0B66) return false; if (c <= 0x0B6F) return true;
1244 if (c < 0x0BE7) return false; if (c <= 0x0BEF) return true;
1245 if (c < 0x0C66) return false; if (c <= 0x0C6F) return true;
1246
1247 if (c < 0x0CE6) return false; if (c <= 0x0CEF) return true;
1248 if (c < 0x0D66) return false; if (c <= 0x0D6F) return true;
1249 if (c < 0x0E50) return false; if (c <= 0x0E59) return true;
1250
1251 if (c < 0x0ED0) return false; if (c <= 0x0ED9) return true;
1252 if (c < 0x0F20) return false; if (c <= 0x0F29) return true;
1253
1254 return false;
1255 }
1256
1257 /**
1258 * This is a utility function for determining whether a specified
1259 * Unicode character is a whitespace character according to production 3
1260 * of the XML 1.0 specification.
1261 *
1262 * @param c <code>char</code> to check for XML whitespace compliance
1263 * @return <code>boolean</code> true if it's a whitespace, false otherwise
1264 */
1265 public static boolean isXMLWhitespace(char c) {
1266 if (c==' ' || c=='\n' || c=='\t' || c=='\r' ){
1267 return true;
1268 }
1269 return false;
1270 }
1271 }