0
|
1 /*--
|
|
2
|
|
3 $Id: Verifier.java,v 1.57 2009/07/23 05:54:23 jhunter Exp $
|
|
4
|
|
5 Copyright (C) 2000-2007 Jason Hunter & Brett McLaughlin.
|
|
6 All rights reserved.
|
|
7
|
|
8 Redistribution and use in source and binary forms, with or without
|
|
9 modification, are permitted provided that the following conditions
|
|
10 are met:
|
|
11
|
|
12 1. Redistributions of source code must retain the above copyright
|
|
13 notice, this list of conditions, and the following disclaimer.
|
|
14
|
|
15 2. Redistributions in binary form must reproduce the above copyright
|
|
16 notice, this list of conditions, and the disclaimer that follows
|
|
17 these conditions in the documentation and/or other materials
|
|
18 provided with the distribution.
|
|
19
|
|
20 3. The name "JDOM" must not be used to endorse or promote products
|
|
21 derived from this software without prior written permission. For
|
|
22 written permission, please contact <request_AT_jdom_DOT_org>.
|
|
23
|
|
24 4. Products derived from this software may not be called "JDOM", nor
|
|
25 may "JDOM" appear in their name, without prior written permission
|
|
26 from the JDOM Project Management <request_AT_jdom_DOT_org>.
|
|
27
|
|
28 In addition, we request (but do not require) that you include in the
|
|
29 end-user documentation provided with the redistribution and/or in the
|
|
30 software itself an acknowledgement equivalent to the following:
|
|
31 "This product includes software developed by the
|
|
32 JDOM Project (http://www.jdom.org/)."
|
|
33 Alternatively, the acknowledgment may be graphical using the logos
|
|
34 available at http://www.jdom.org/images/logos.
|
|
35
|
|
36 THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
|
|
37 WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
|
38 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
39 DISCLAIMED. IN NO EVENT SHALL THE JDOM AUTHORS OR THE PROJECT
|
|
40 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
41 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
42 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
|
|
43 USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
|
44 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
45 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
|
|
46 OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
47 SUCH DAMAGE.
|
|
48
|
|
49 This software consists of voluntary contributions made by many
|
|
50 individuals on behalf of the JDOM Project and was originally
|
|
51 created by Jason Hunter <jhunter_AT_jdom_DOT_org> and
|
|
52 Brett McLaughlin <brett_AT_jdom_DOT_org>. For more information
|
|
53 on the JDOM Project, please see <http://www.jdom.org/>.
|
|
54
|
|
55 */
|
|
56
|
|
57 package org.jdom;
|
|
58
|
|
59 import java.util.*;
|
|
60
|
|
61 /**
|
|
62 * A utility class to handle well-formedness checks on names, data, and other
|
|
63 * verification tasks for JDOM. The class is final and may not be subclassed.
|
|
64 *
|
|
65 * @version $Revision: 1.57 $, $Date: 2009/07/23 05:54:23 $
|
|
66 * @author Brett McLaughlin
|
|
67 * @author Elliotte Rusty Harold
|
|
68 * @author Jason Hunter
|
|
69 * @author Bradley S. Huffman
|
|
70 */
|
|
71 final public class Verifier {
|
|
72
|
|
73 private static final String CVS_ID =
|
|
74 "@(#) $RCSfile: Verifier.java,v $ $Revision: 1.57 $ $Date: 2009/07/23 05:54:23 $ $Name: jdom_1_1_1 $";
|
|
75
|
|
76 /**
|
|
77 * Ensure instantation cannot occur.
|
|
78 */
|
|
79 private Verifier() { }
|
|
80
|
|
81 /**
|
|
82 * This will check the supplied name to see if it is legal for use as
|
|
83 * a JDOM <code>{@link Element}</code> name.
|
|
84 *
|
|
85 * @param name <code>String</code> name to check.
|
|
86 * @return <code>String</code> reason name is illegal, or
|
|
87 * <code>null</code> if name is OK.
|
|
88 */
|
|
89 public static String checkElementName(String name) {
|
|
90 // Check basic XML name rules first
|
|
91 String reason;
|
|
92 if ((reason = checkXMLName(name)) != null) {
|
|
93 return reason;
|
|
94 }
|
|
95
|
|
96 // No colons allowed, since elements handle this internally
|
|
97 if (name.indexOf(":") != -1) {
|
|
98 return "Element names cannot contain colons";
|
|
99 }
|
|
100
|
|
101 // If we got here, everything is OK
|
|
102 return null;
|
|
103 }
|
|
104
|
|
105 /**
|
|
106 * This will check the supplied name to see if it is legal for use as
|
|
107 * a JDOM <code>{@link Attribute}</code> name.
|
|
108 *
|
|
109 * @param name <code>String</code> name to check.
|
|
110 * @return <code>String</code> reason name is illegal, or
|
|
111 * <code>null</code> if name is OK.
|
|
112 */
|
|
113 public static String checkAttributeName(String name) {
|
|
114 // Check basic XML name rules first
|
|
115 String reason;
|
|
116 if ((reason = checkXMLName(name)) != null) {
|
|
117 return reason;
|
|
118 }
|
|
119
|
|
120 // No colons are allowed, since attributes handle this internally
|
|
121 if (name.indexOf(":") != -1) {
|
|
122 return "Attribute names cannot contain colons";
|
|
123 }
|
|
124
|
|
125 // Attribute names may not be xmlns since we do this internally too
|
|
126 if (name.equals("xmlns")) {
|
|
127 return "An Attribute name may not be \"xmlns\"; " +
|
|
128 "use the Namespace class to manage namespaces";
|
|
129 }
|
|
130
|
|
131 // If we got here, everything is OK
|
|
132 return null;
|
|
133 }
|
|
134
|
|
135 /**
|
|
136 * This will check the supplied string to see if it only contains
|
|
137 * characters allowed by the XML 1.0 specification. The C0 controls
|
|
138 * (e.g. null, vertical tab, formfeed, etc.) are specifically excluded
|
|
139 * except for carriage return, linefeed, and the horizontal tab.
|
|
140 * Surrogates are also excluded.
|
|
141 * <p>
|
|
142 * This method is useful for checking element content and attribute
|
|
143 * values. Note that characters
|
|
144 * like " and < are allowed in attribute values and element content.
|
|
145 * They will simply be escaped as " or <
|
|
146 * when the value is serialized.
|
|
147 * </p>
|
|
148 *
|
|
149 * @param text <code>String</code> value to check.
|
|
150 * @return <code>String</code> reason name is illegal, or
|
|
151 * <code>null</code> if name is OK.
|
|
152 */
|
|
153 public static String checkCharacterData(String text) {
|
|
154 if (text == null) {
|
|
155 return "A null is not a legal XML value";
|
|
156 }
|
|
157
|
|
158 // Do check
|
|
159 for (int i = 0, len = text.length(); i<len; i++) {
|
|
160
|
|
161 int ch = text.charAt(i);
|
|
162
|
|
163 // Check if high part of a surrogate pair
|
|
164 if (isHighSurrogate((char) ch)) {
|
|
165 // Check if next char is the low-surrogate
|
|
166 i++;
|
|
167 if (i < len) {
|
|
168 char low = text.charAt(i);
|
|
169 if (!isLowSurrogate(low)) {
|
|
170 return "Illegal Surrogate Pair";
|
|
171 }
|
|
172 // It's a good pair, calculate the true value of
|
|
173 // the character to then fall thru to isXMLCharacter
|
|
174 ch = decodeSurrogatePair((char) ch, low);
|
|
175 }
|
|
176 else {
|
|
177 return "Surrogate Pair Truncated";
|
|
178 }
|
|
179 }
|
|
180
|
|
181 if (!isXMLCharacter(ch)) {
|
|
182 // Likely this character can't be easily displayed
|
|
183 // because it's a control so we use it'd hexadecimal
|
|
184 // representation in the reason.
|
|
185 return ("0x" + Integer.toHexString(ch) +
|
|
186 " is not a legal XML character");
|
|
187 }
|
|
188 }
|
|
189
|
|
190 // If we got here, everything is OK
|
|
191 return null;
|
|
192 }
|
|
193
|
|
194 /**
|
|
195 * This will check the supplied data to see if it is legal for use as
|
|
196 * JDOM <code>{@link CDATA}</code>.
|
|
197 *
|
|
198 * @param data <code>String</code> data to check.
|
|
199 * @return <code>String</code> reason data is illegal, or
|
|
200 * <code>null</code> is name is OK.
|
|
201 */
|
|
202 public static String checkCDATASection(String data) {
|
|
203 String reason = null;
|
|
204 if ((reason = checkCharacterData(data)) != null) {
|
|
205 return reason;
|
|
206 }
|
|
207
|
|
208 if (data.indexOf("]]>") != -1) {
|
|
209 return "CDATA cannot internally contain a CDATA ending " +
|
|
210 "delimiter (]]>)";
|
|
211 }
|
|
212
|
|
213 // If we got here, everything is OK
|
|
214 return null;
|
|
215 }
|
|
216
|
|
217 /**
|
|
218 * This will check the supplied name to see if it is legal for use as
|
|
219 * a JDOM <code>{@link Namespace}</code> prefix.
|
|
220 *
|
|
221 * @param prefix <code>String</code> prefix to check.
|
|
222 * @return <code>String</code> reason name is illegal, or
|
|
223 * <code>null</code> if name is OK.
|
|
224 */
|
|
225 public static String checkNamespacePrefix(String prefix) {
|
|
226 // Manually do rules, since URIs can be null or empty
|
|
227 if ((prefix == null) || (prefix.equals(""))) {
|
|
228 return null;
|
|
229 }
|
|
230
|
|
231 // Cannot start with a number
|
|
232 char first = prefix.charAt(0);
|
|
233 if (isXMLDigit(first)) {
|
|
234 return "Namespace prefixes cannot begin with a number";
|
|
235 }
|
|
236 // Cannot start with a $
|
|
237 if (first == '$') {
|
|
238 return "Namespace prefixes cannot begin with a dollar sign ($)";
|
|
239 }
|
|
240 // Cannot start with a -
|
|
241 if (first == '-') {
|
|
242 return "Namespace prefixes cannot begin with a hyphen (-)";
|
|
243 }
|
|
244 // Cannot start with a .
|
|
245 if (first == '.') {
|
|
246 return "Namespace prefixes cannot begin with a period (.)";
|
|
247 }
|
|
248 // Cannot start with "xml" in any character case
|
|
249 if (prefix.toLowerCase().startsWith("xml")) {
|
|
250 return "Namespace prefixes cannot begin with " +
|
|
251 "\"xml\" in any combination of case";
|
|
252 }
|
|
253
|
|
254 // Ensure legal content
|
|
255 for (int i=0, len = prefix.length(); i<len; i++) {
|
|
256 char c = prefix.charAt(i);
|
|
257 if (!isXMLNameCharacter(c)) {
|
|
258 return "Namespace prefixes cannot contain the character \"" +
|
|
259 c + "\"";
|
|
260 }
|
|
261 }
|
|
262
|
|
263 // No colons allowed
|
|
264 if (prefix.indexOf(":") != -1) {
|
|
265 return "Namespace prefixes cannot contain colons";
|
|
266 }
|
|
267
|
|
268 // If we got here, everything is OK
|
|
269 return null;
|
|
270 }
|
|
271
|
|
272 /**
|
|
273 * This will check the supplied name to see if it is legal for use as
|
|
274 * a JDOM <code>{@link Namespace}</code> URI.
|
|
275 *
|
|
276 * @param uri <code>String</code> URI to check.
|
|
277 * @return <code>String</code> reason name is illegal, or
|
|
278 * <code>null</code> if name is OK.
|
|
279 */
|
|
280 public static String checkNamespaceURI(String uri) {
|
|
281 // Manually do rules, since URIs can be null or empty
|
|
282 if ((uri == null) || (uri.equals(""))) {
|
|
283 return null;
|
|
284 }
|
|
285
|
|
286 // Cannot start with a number
|
|
287 char first = uri.charAt(0);
|
|
288 if (Character.isDigit(first)) {
|
|
289 return "Namespace URIs cannot begin with a number";
|
|
290 }
|
|
291 // Cannot start with a $
|
|
292 if (first == '$') {
|
|
293 return "Namespace URIs cannot begin with a dollar sign ($)";
|
|
294 }
|
|
295 // Cannot start with a -
|
|
296 if (first == '-') {
|
|
297 return "Namespace URIs cannot begin with a hyphen (-)";
|
|
298 }
|
|
299
|
|
300 // If we got here, everything is OK
|
|
301 return null;
|
|
302 }
|
|
303
|
|
304 /**
|
|
305 * Check if two namespaces collide.
|
|
306 *
|
|
307 * @param namespace <code>Namespace</code> to check.
|
|
308 * @param other <code>Namespace</code> to check against.
|
|
309 * @return <code>String</code> reason for collision, or
|
|
310 * <code>null</code> if no collision.
|
|
311 */
|
|
312 public static String checkNamespaceCollision(Namespace namespace,
|
|
313 Namespace other) {
|
|
314 String p1,p2,u1,u2,reason;
|
|
315
|
|
316 reason = null;
|
|
317 p1 = namespace.getPrefix();
|
|
318 u1 = namespace.getURI();
|
|
319 p2 = other.getPrefix();
|
|
320 u2 = other.getURI();
|
|
321 if (p1.equals(p2) && !u1.equals(u2)) {
|
|
322 reason = "The namespace prefix \"" + p1 + "\" collides";
|
|
323 }
|
|
324 return reason;
|
|
325 }
|
|
326
|
|
327 /**
|
|
328 * Check if <code>{@link Attribute}</code>'s namespace collides with a
|
|
329 * <code>{@link Element}</code>'s namespace.
|
|
330 *
|
|
331 * @param attribute <code>Attribute</code> to check.
|
|
332 * @param element <code>Element</code> to check against.
|
|
333 * @return <code>String</code> reason for collision, or
|
|
334 * <code>null</code> if no collision.
|
|
335 */
|
|
336 public static String checkNamespaceCollision(Attribute attribute,
|
|
337 Element element) {
|
|
338 Namespace namespace = attribute.getNamespace();
|
|
339 String prefix = namespace.getPrefix();
|
|
340 if ("".equals(prefix)) {
|
|
341 return null;
|
|
342 }
|
|
343
|
|
344 return checkNamespaceCollision(namespace, element);
|
|
345 }
|
|
346
|
|
347 /**
|
|
348 * Check if a <code>{@link Namespace}</code> collides with a
|
|
349 * <code>{@link Element}</code>'s namespace.
|
|
350 *
|
|
351 * @param namespace <code>Namespace</code> to check.
|
|
352 * @param element <code>Element</code> to check against.
|
|
353 * @return <code>String</code> reason for collision, or
|
|
354 * <code>null</code> if no collision.
|
|
355 */
|
|
356 public static String checkNamespaceCollision(Namespace namespace,
|
|
357 Element element) {
|
|
358 String reason = checkNamespaceCollision(namespace,
|
|
359 element.getNamespace());
|
|
360 if (reason != null) {
|
|
361 return reason + " with the element namespace prefix";
|
|
362 }
|
|
363
|
|
364 reason = checkNamespaceCollision(namespace,
|
|
365 element.getAdditionalNamespaces());
|
|
366 if (reason != null) {
|
|
367 return reason;
|
|
368 }
|
|
369
|
|
370 reason = checkNamespaceCollision(namespace, element.getAttributes());
|
|
371 if (reason != null) {
|
|
372 return reason;
|
|
373 }
|
|
374
|
|
375 return null;
|
|
376 }
|
|
377
|
|
378 /**
|
|
379 * Check if a <code>{@link Namespace}</code> collides with a
|
|
380 * <code>{@link Attribute}</code>'s namespace.
|
|
381 *
|
|
382 * @param namespace <code>Namespace</code> to check.
|
|
383 * @param attribute <code>Attribute</code> to check against.
|
|
384 * @return <code>String</code> reason for collision, or
|
|
385 * <code>null</code> if no collision.
|
|
386 */
|
|
387 public static String checkNamespaceCollision(Namespace namespace,
|
|
388 Attribute attribute) {
|
|
389 String reason = null;
|
|
390 if (!attribute.getNamespace().equals(Namespace.NO_NAMESPACE)) {
|
|
391 reason = checkNamespaceCollision(namespace,
|
|
392 attribute.getNamespace());
|
|
393 if (reason != null) {
|
|
394 reason += " with an attribute namespace prefix on the element";
|
|
395 }
|
|
396 }
|
|
397 return reason;
|
|
398 }
|
|
399
|
|
400 /**
|
|
401 * Check if a <code>{@link Namespace}</code> collides with any namespace
|
|
402 * from a list of objects.
|
|
403 *
|
|
404 * @param namespace <code>Namespace</code> to check.
|
|
405 * @param list <code>List</code> to check against.
|
|
406 * @return <code>String</code> reason for collision, or
|
|
407 * <code>null</code> if no collision.
|
|
408 */
|
|
409 public static String checkNamespaceCollision(Namespace namespace,
|
|
410 List list) {
|
|
411 if (list == null) {
|
|
412 return null;
|
|
413 }
|
|
414
|
|
415 String reason = null;
|
|
416 Iterator i = list.iterator();
|
|
417 while ((reason == null) && i.hasNext()) {
|
|
418 Object obj = i.next();
|
|
419 if (obj instanceof Attribute) {
|
|
420 reason = checkNamespaceCollision(namespace, (Attribute) obj);
|
|
421 }
|
|
422 else if (obj instanceof Element) {
|
|
423 reason = checkNamespaceCollision(namespace, (Element) obj);
|
|
424 }
|
|
425 else if (obj instanceof Namespace) {
|
|
426 reason = checkNamespaceCollision(namespace, (Namespace) obj);
|
|
427 if (reason != null) {
|
|
428 reason += " with an additional namespace declared" +
|
|
429 " by the element";
|
|
430 }
|
|
431 }
|
|
432 }
|
|
433 return reason;
|
|
434 }
|
|
435
|
|
436 /**
|
|
437 * This will check the supplied data to see if it is legal for use as
|
|
438 * a JDOM <code>{@link ProcessingInstruction}</code> target.
|
|
439 *
|
|
440 * @param target <code>String</code> target to check.
|
|
441 * @return <code>String</code> reason target is illegal, or
|
|
442 * <code>null</code> if target is OK.
|
|
443 */
|
|
444 public static String checkProcessingInstructionTarget(String target) {
|
|
445 // Check basic XML name rules first
|
|
446 String reason;
|
|
447 if ((reason = checkXMLName(target)) != null) {
|
|
448 return reason;
|
|
449 }
|
|
450
|
|
451 // No colons allowed, per Namespace Specification Section 6
|
|
452 if (target.indexOf(":") != -1) {
|
|
453 return "Processing instruction targets cannot contain colons";
|
|
454 }
|
|
455
|
|
456 // Cannot begin with 'xml' in any case
|
|
457 if (target.equalsIgnoreCase("xml")) {
|
|
458 return "Processing instructions cannot have a target of " +
|
|
459 "\"xml\" in any combination of case. (Note that the " +
|
|
460 "\"<?xml ... ?>\" declaration at the beginning of a " +
|
|
461 "document is not a processing instruction and should not " +
|
|
462 "be added as one; it is written automatically during " +
|
|
463 "output, e.g. by XMLOutputter.)";
|
|
464 }
|
|
465
|
|
466 // If we got here, everything is OK
|
|
467 return null;
|
|
468 }
|
|
469
|
|
470 /**
|
|
471 * This will check the supplied data to see if it is legal for use as
|
|
472 * <code>{@link ProcessingInstruction}</code> data. Besides checking that
|
|
473 * all the characters are allowed in XML, this also checks
|
|
474 * that the data does not contain the PI end-string "?>".
|
|
475 *
|
|
476 * @param data <code>String</code> data to check.
|
|
477 * @return <code>String</code> reason data is illegal, or
|
|
478 * <code>null</code> if data is OK.
|
|
479 */
|
|
480 public static String checkProcessingInstructionData(String data) {
|
|
481 // Check basic XML name rules first
|
|
482 String reason = checkCharacterData(data);
|
|
483
|
|
484 if (reason == null) {
|
|
485 if (data.indexOf("?>") >= 0) {
|
|
486 return "Processing instructions cannot contain " +
|
|
487 "the string \"?>\"";
|
|
488 }
|
|
489 }
|
|
490
|
|
491 return reason;
|
|
492 }
|
|
493
|
|
494 /**
|
|
495 * This will check the supplied data to see if it is legal for use as
|
|
496 * JDOM <code>{@link Comment}</code> data.
|
|
497 *
|
|
498 * @param data <code>String</code> data to check.
|
|
499 * @return <code>String</code> reason data is illegal, or
|
|
500 * <code>null</code> if data is OK.
|
|
501 */
|
|
502 public static String checkCommentData(String data) {
|
|
503 String reason = null;
|
|
504 if ((reason = checkCharacterData(data)) != null) {
|
|
505 return reason;
|
|
506 }
|
|
507
|
|
508 if (data.indexOf("--") != -1) {
|
|
509 return "Comments cannot contain double hyphens (--)";
|
|
510 }
|
|
511 if (data.endsWith("-")) {
|
|
512 return "Comment data cannot end with a hyphen.";
|
|
513 }
|
|
514
|
|
515 // If we got here, everything is OK
|
|
516 return null;
|
|
517 }
|
|
518 /**
|
|
519 * This is a utility function to decode a non-BMP
|
|
520 * UTF-16 surrogate pair.
|
|
521 * @param high high 16 bits
|
|
522 * @param low low 16 bits
|
|
523 * @return decoded character
|
|
524 */
|
|
525 public static int decodeSurrogatePair(char high, char low) {
|
|
526 return 0x10000 + (high - 0xD800) * 0x400 + (low - 0xDC00);
|
|
527 }
|
|
528
|
|
529 // [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] |
|
|
530 // [-'()+,./:=?;*#@$_%]
|
|
531 public static boolean isXMLPublicIDCharacter(char c) {
|
|
532
|
|
533 if (c >= 'a' && c <= 'z') return true;
|
|
534 if (c >= '?' && c <= 'Z') return true;
|
|
535 if (c >= '\'' && c <= ';') return true;
|
|
536
|
|
537 if (c == ' ') return true;
|
|
538 if (c == '!') return true;
|
|
539 if (c == '=') return true;
|
|
540 if (c == '#') return true;
|
|
541 if (c == '$') return true;
|
|
542 if (c == '_') return true;
|
|
543 if (c == '%') return true;
|
|
544 if (c == '\n') return true;
|
|
545 if (c == '\r') return true;
|
|
546 if (c == '\t') return true;
|
|
547
|
|
548 return false;
|
|
549 }
|
|
550
|
|
551 /**
|
|
552 * This will ensure that the data for a public identifier
|
|
553 * is legal.
|
|
554 *
|
|
555 * @param publicID <code>String</code> public ID to check.
|
|
556 * @return <code>String</code> reason public ID is illegal, or
|
|
557 * <code>null</code> if public ID is OK.
|
|
558 */
|
|
559 public static String checkPublicID(String publicID) {
|
|
560 String reason = null;
|
|
561
|
|
562 if (publicID == null) return null;
|
|
563 // This indicates there is no public ID
|
|
564
|
|
565 for (int i = 0; i < publicID.length(); i++) {
|
|
566 char c = publicID.charAt(i);
|
|
567 if (!isXMLPublicIDCharacter(c)) {
|
|
568 reason = c + " is not a legal character in public IDs";
|
|
569 break;
|
|
570 }
|
|
571 }
|
|
572
|
|
573 return reason;
|
|
574 }
|
|
575
|
|
576
|
|
577 /**
|
|
578 * This will ensure that the data for a system literal
|
|
579 * is legal.
|
|
580 *
|
|
581 * @param systemLiteral <code>String</code> system literal to check.
|
|
582 * @return <code>String</code> reason system literal is illegal, or
|
|
583 * <code>null</code> if system literal is OK.
|
|
584 */
|
|
585 public static String checkSystemLiteral(String systemLiteral) {
|
|
586 String reason = null;
|
|
587
|
|
588 if (systemLiteral == null) return null;
|
|
589 // This indicates there is no system ID
|
|
590
|
|
591 if (systemLiteral.indexOf('\'') != -1
|
|
592 && systemLiteral.indexOf('"') != -1) {
|
|
593 reason =
|
|
594 "System literals cannot simultaneously contain both single and double quotes.";
|
|
595 }
|
|
596 else {
|
|
597 reason = checkCharacterData(systemLiteral);
|
|
598 }
|
|
599
|
|
600 return reason;
|
|
601 }
|
|
602
|
|
603 /**
|
|
604 * This is a utility function for sharing the base process of checking
|
|
605 * any XML name.
|
|
606 *
|
|
607 * @param name <code>String</code> to check for XML name compliance.
|
|
608 * @return <code>String</code> reason the name is illegal, or
|
|
609 * <code>null</code> if OK.
|
|
610 */
|
|
611 public static String checkXMLName(String name) {
|
|
612 // Cannot be empty or null
|
|
613 if ((name == null) || (name.length() == 0)
|
|
614 || (name.trim().equals(""))) {
|
|
615 return "XML names cannot be null or empty";
|
|
616 }
|
|
617
|
|
618
|
|
619 // Cannot start with a number
|
|
620 char first = name.charAt(0);
|
|
621 if (!isXMLNameStartCharacter(first)) {
|
|
622 return "XML names cannot begin with the character \"" +
|
|
623 first + "\"";
|
|
624 }
|
|
625 // Ensure legal content for non-first chars
|
|
626 for (int i=1, len = name.length(); i<len; i++) {
|
|
627 char c = name.charAt(i);
|
|
628 if (!isXMLNameCharacter(c)) {
|
|
629 return "XML names cannot contain the character \"" + c + "\"";
|
|
630 }
|
|
631 }
|
|
632
|
|
633 // We got here, so everything is OK
|
|
634 return null;
|
|
635 }
|
|
636
|
|
637 /**
|
|
638 * <p>
|
|
639 * Checks a string to see if it is a legal RFC 2396 URI.
|
|
640 * Both absolute and relative URIs are supported.
|
|
641 * </p>
|
|
642 *
|
|
643 * @param uri <code>String</code> to check.
|
|
644 * @return <code>String</code> reason the URI is illegal, or
|
|
645 * <code>null</code> if OK.
|
|
646 */
|
|
647 public static String checkURI(String uri) {
|
|
648 // URIs can be null or empty
|
|
649 if ((uri == null) || (uri.equals(""))) {
|
|
650 return null;
|
|
651 }
|
|
652
|
|
653 for (int i = 0; i < uri.length(); i++) {
|
|
654 char test = uri.charAt(i);
|
|
655 if (!isURICharacter(test)) {
|
|
656 String msgNumber = "0x" + Integer.toHexString(test);
|
|
657 if (test <= 0x09) msgNumber = "0x0" + Integer.toHexString(test);
|
|
658 return "URIs cannot contain " + msgNumber;
|
|
659 } // end if
|
|
660 if (test == '%') { // must be followed by two hexadecimal digits
|
|
661 try {
|
|
662 char firstDigit = uri.charAt(i+1);
|
|
663 char secondDigit = uri.charAt(i+2);
|
|
664 if (!isHexDigit(firstDigit) ||
|
|
665 !isHexDigit(secondDigit)) {
|
|
666 return "Percent signs in URIs must be followed by "
|
|
667 + "exactly two hexadecimal digits.";
|
|
668 }
|
|
669
|
|
670 }
|
|
671 catch (StringIndexOutOfBoundsException e) {
|
|
672 return "Percent signs in URIs must be followed by "
|
|
673 + "exactly two hexadecimal digits.";
|
|
674 }
|
|
675 }
|
|
676 } // end for
|
|
677
|
|
678 // If we got here, everything is OK
|
|
679 return null;
|
|
680 }
|
|
681
|
|
682 /**
|
|
683 * <p>
|
|
684 * This is a utility function for determining whether a specified
|
|
685 * Unicode character is a hexadecimal digit as defined in RFC 2396;
|
|
686 * that is, one of the ASCII characters 0-9, a-f, or A-F.
|
|
687 * </p>
|
|
688 *
|
|
689 * @param c to check for hex digit.
|
|
690 * @return true if it's allowed, false otherwise.
|
|
691 */
|
|
692 public static boolean isHexDigit(char c) {
|
|
693
|
|
694 // I suspect most characters passed to this method will be
|
|
695 // correct hexadecimal digits, so I test for the true cases
|
|
696 // first. If this proves to be a performance bottleneck
|
|
697 // a switch statement or lookup table
|
|
698 // might optimize this.
|
|
699 if (c >= '0' && c <= '9') return true;
|
|
700 if (c >= 'A' && c <= 'F') return true;
|
|
701 if (c >= 'a' && c <= 'f') return true;
|
|
702
|
|
703 return false;
|
|
704 }
|
|
705
|
|
706 /**
|
|
707 * This is a function for determining whether the
|
|
708 * specified character is the high 16 bits in a
|
|
709 * UTF-16 surrogate pair.
|
|
710 * @param ch character to check
|
|
711 * @return true if the character is a high surrogate, false otherwise
|
|
712 */
|
|
713 public static boolean isHighSurrogate(char ch) {
|
|
714 return (ch >= 0xD800 && ch <= 0xDBFF);
|
|
715 }
|
|
716
|
|
717 /**
|
|
718 * This is a function for determining whether the
|
|
719 * specified character is the low 16 bits in a
|
|
720 * UTF-16 surrogate pair.
|
|
721 * @param ch character to check
|
|
722 * @return true if the character is a low surrogate, false otherwise.
|
|
723 */
|
|
724 public static boolean isLowSurrogate(char ch) {
|
|
725 return (ch >= 0xDC00 && ch <= 0xDFFF);
|
|
726 }
|
|
727
|
|
728 /**
|
|
729 * <p>
|
|
730 * This is a utility function for determining whether
|
|
731 * a specified Unicode character is legal in URI references
|
|
732 * as determined by RFC 2396.
|
|
733 * </p>
|
|
734 *
|
|
735 * @param c <code>char</code> to check for URI reference compliance.
|
|
736 * @return true if it's allowed, false otherwise.
|
|
737 */
|
|
738 public static boolean isURICharacter(char c) {
|
|
739 if (c >= 'a' && c <= 'z') return true;
|
|
740 if (c >= 'A' && c <= 'Z') return true;
|
|
741 if (c >= '0' && c <= '9') return true;
|
|
742 if (c == '/') return true;
|
|
743 if (c == '-') return true;
|
|
744 if (c == '.') return true;
|
|
745 if (c == '?') return true;
|
|
746 if (c == ':') return true;
|
|
747 if (c == '@') return true;
|
|
748 if (c == '&') return true;
|
|
749 if (c == '=') return true;
|
|
750 if (c == '+') return true;
|
|
751 if (c == '$') return true;
|
|
752 if (c == ',') return true;
|
|
753 if (c == '%') return true;
|
|
754
|
|
755 if (c == '_') return true;
|
|
756 if (c == '!') return true;
|
|
757 if (c == '~') return true;
|
|
758 if (c == '*') return true;
|
|
759 if (c == '\'') return true;
|
|
760 if (c == '(') return true;
|
|
761 if (c == ')') return true;
|
|
762 return false;
|
|
763 }
|
|
764
|
|
765 /**
|
|
766 * This is a utility function for determining whether a specified
|
|
767 * character is a character according to production 2 of the
|
|
768 * XML 1.0 specification.
|
|
769 *
|
|
770 * @param c <code>char</code> to check for XML compliance
|
|
771 * @return <code>boolean</code> true if it's a character,
|
|
772 * false otherwise
|
|
773 */
|
|
774 public static boolean isXMLCharacter(int c) {
|
|
775
|
|
776 if (c == '\n') return true;
|
|
777 if (c == '\r') return true;
|
|
778 if (c == '\t') return true;
|
|
779
|
|
780 if (c < 0x20) return false; if (c <= 0xD7FF) return true;
|
|
781 if (c < 0xE000) return false; if (c <= 0xFFFD) return true;
|
|
782 if (c < 0x10000) return false; if (c <= 0x10FFFF) return true;
|
|
783
|
|
784 return false;
|
|
785 }
|
|
786
|
|
787
|
|
788 /**
|
|
789 * This is a utility function for determining whether a specified
|
|
790 * character is a name character according to production 4 of the
|
|
791 * XML 1.0 specification.
|
|
792 *
|
|
793 * @param c <code>char</code> to check for XML name compliance.
|
|
794 * @return <code>boolean</code> true if it's a name character,
|
|
795 * false otherwise.
|
|
796 */
|
|
797 public static boolean isXMLNameCharacter(char c) {
|
|
798
|
|
799 return (isXMLLetter(c) || isXMLDigit(c) || c == '.' || c == '-'
|
|
800 || c == '_' || c == ':' || isXMLCombiningChar(c)
|
|
801 || isXMLExtender(c));
|
|
802 }
|
|
803
|
|
804 /**
|
|
805 * This is a utility function for determining whether a specified
|
|
806 * character is a legal name start character according to production 5
|
|
807 * of the XML 1.0 specification. This production does allow names
|
|
808 * to begin with colons which the Namespaces in XML Recommendation
|
|
809 * disallows.
|
|
810 *
|
|
811 * @param c <code>char</code> to check for XML name start compliance.
|
|
812 * @return <code>boolean</code> true if it's a name start character,
|
|
813 * false otherwise.
|
|
814 */
|
|
815 public static boolean isXMLNameStartCharacter(char c) {
|
|
816
|
|
817 return (isXMLLetter(c) || c == '_' || c ==':');
|
|
818
|
|
819 }
|
|
820
|
|
821 /**
|
|
822 * This is a utility function for determining whether a specified
|
|
823 * character is a letter or digit according to productions 84 and 88
|
|
824 * of the XML 1.0 specification.
|
|
825 *
|
|
826 * @param c <code>char</code> to check.
|
|
827 * @return <code>boolean</code> true if it's letter or digit,
|
|
828 * false otherwise.
|
|
829 */
|
|
830 public static boolean isXMLLetterOrDigit(char c) {
|
|
831
|
|
832 return (isXMLLetter(c) || isXMLDigit(c));
|
|
833
|
|
834 }
|
|
835
|
|
836 /**
|
|
837 * This is a utility function for determining whether a specified character
|
|
838 * is a letter according to production 84 of the XML 1.0 specification.
|
|
839 *
|
|
840 * @param c <code>char</code> to check for XML name compliance.
|
|
841 * @return <code>String</code> true if it's a letter, false otherwise.
|
|
842 */
|
|
843 public static boolean isXMLLetter(char c) {
|
|
844 // Note that order is very important here. The search proceeds
|
|
845 // from lowest to highest values, so that no searching occurs
|
|
846 // above the character's value. BTW, the first line is equivalent to:
|
|
847 // if (c >= 0x0041 && c <= 0x005A) return true;
|
|
848
|
|
849 if (c < 0x0041) return false; if (c <= 0x005a) return true;
|
|
850 if (c < 0x0061) return false; if (c <= 0x007A) return true;
|
|
851 if (c < 0x00C0) return false; if (c <= 0x00D6) return true;
|
|
852 if (c < 0x00D8) return false; if (c <= 0x00F6) return true;
|
|
853 if (c < 0x00F8) return false; if (c <= 0x00FF) return true;
|
|
854 if (c < 0x0100) return false; if (c <= 0x0131) return true;
|
|
855 if (c < 0x0134) return false; if (c <= 0x013E) return true;
|
|
856 if (c < 0x0141) return false; if (c <= 0x0148) return true;
|
|
857 if (c < 0x014A) return false; if (c <= 0x017E) return true;
|
|
858 if (c < 0x0180) return false; if (c <= 0x01C3) return true;
|
|
859 if (c < 0x01CD) return false; if (c <= 0x01F0) return true;
|
|
860 if (c < 0x01F4) return false; if (c <= 0x01F5) return true;
|
|
861 if (c < 0x01FA) return false; if (c <= 0x0217) return true;
|
|
862 if (c < 0x0250) return false; if (c <= 0x02A8) return true;
|
|
863 if (c < 0x02BB) return false; if (c <= 0x02C1) return true;
|
|
864 if (c == 0x0386) return true;
|
|
865 if (c < 0x0388) return false; if (c <= 0x038A) return true;
|
|
866 if (c == 0x038C) return true;
|
|
867 if (c < 0x038E) return false; if (c <= 0x03A1) return true;
|
|
868 if (c < 0x03A3) return false; if (c <= 0x03CE) return true;
|
|
869 if (c < 0x03D0) return false; if (c <= 0x03D6) return true;
|
|
870 if (c == 0x03DA) return true;
|
|
871 if (c == 0x03DC) return true;
|
|
872 if (c == 0x03DE) return true;
|
|
873 if (c == 0x03E0) return true;
|
|
874 if (c < 0x03E2) return false; if (c <= 0x03F3) return true;
|
|
875 if (c < 0x0401) return false; if (c <= 0x040C) return true;
|
|
876 if (c < 0x040E) return false; if (c <= 0x044F) return true;
|
|
877 if (c < 0x0451) return false; if (c <= 0x045C) return true;
|
|
878 if (c < 0x045E) return false; if (c <= 0x0481) return true;
|
|
879 if (c < 0x0490) return false; if (c <= 0x04C4) return true;
|
|
880 if (c < 0x04C7) return false; if (c <= 0x04C8) return true;
|
|
881 if (c < 0x04CB) return false; if (c <= 0x04CC) return true;
|
|
882 if (c < 0x04D0) return false; if (c <= 0x04EB) return true;
|
|
883 if (c < 0x04EE) return false; if (c <= 0x04F5) return true;
|
|
884 if (c < 0x04F8) return false; if (c <= 0x04F9) return true;
|
|
885 if (c < 0x0531) return false; if (c <= 0x0556) return true;
|
|
886 if (c == 0x0559) return true;
|
|
887 if (c < 0x0561) return false; if (c <= 0x0586) return true;
|
|
888 if (c < 0x05D0) return false; if (c <= 0x05EA) return true;
|
|
889 if (c < 0x05F0) return false; if (c <= 0x05F2) return true;
|
|
890 if (c < 0x0621) return false; if (c <= 0x063A) return true;
|
|
891 if (c < 0x0641) return false; if (c <= 0x064A) return true;
|
|
892 if (c < 0x0671) return false; if (c <= 0x06B7) return true;
|
|
893 if (c < 0x06BA) return false; if (c <= 0x06BE) return true;
|
|
894 if (c < 0x06C0) return false; if (c <= 0x06CE) return true;
|
|
895 if (c < 0x06D0) return false; if (c <= 0x06D3) return true;
|
|
896 if (c == 0x06D5) return true;
|
|
897 if (c < 0x06E5) return false; if (c <= 0x06E6) return true;
|
|
898 if (c < 0x0905) return false; if (c <= 0x0939) return true;
|
|
899 if (c == 0x093D) return true;
|
|
900 if (c < 0x0958) return false; if (c <= 0x0961) return true;
|
|
901 if (c < 0x0985) return false; if (c <= 0x098C) return true;
|
|
902 if (c < 0x098F) return false; if (c <= 0x0990) return true;
|
|
903 if (c < 0x0993) return false; if (c <= 0x09A8) return true;
|
|
904 if (c < 0x09AA) return false; if (c <= 0x09B0) return true;
|
|
905 if (c == 0x09B2) return true;
|
|
906 if (c < 0x09B6) return false; if (c <= 0x09B9) return true;
|
|
907 if (c < 0x09DC) return false; if (c <= 0x09DD) return true;
|
|
908 if (c < 0x09DF) return false; if (c <= 0x09E1) return true;
|
|
909 if (c < 0x09F0) return false; if (c <= 0x09F1) return true;
|
|
910 if (c < 0x0A05) return false; if (c <= 0x0A0A) return true;
|
|
911 if (c < 0x0A0F) return false; if (c <= 0x0A10) return true;
|
|
912 if (c < 0x0A13) return false; if (c <= 0x0A28) return true;
|
|
913 if (c < 0x0A2A) return false; if (c <= 0x0A30) return true;
|
|
914 if (c < 0x0A32) return false; if (c <= 0x0A33) return true;
|
|
915 if (c < 0x0A35) return false; if (c <= 0x0A36) return true;
|
|
916 if (c < 0x0A38) return false; if (c <= 0x0A39) return true;
|
|
917 if (c < 0x0A59) return false; if (c <= 0x0A5C) return true;
|
|
918 if (c == 0x0A5E) return true;
|
|
919 if (c < 0x0A72) return false; if (c <= 0x0A74) return true;
|
|
920 if (c < 0x0A85) return false; if (c <= 0x0A8B) return true;
|
|
921 if (c == 0x0A8D) return true;
|
|
922 if (c < 0x0A8F) return false; if (c <= 0x0A91) return true;
|
|
923 if (c < 0x0A93) return false; if (c <= 0x0AA8) return true;
|
|
924 if (c < 0x0AAA) return false; if (c <= 0x0AB0) return true;
|
|
925 if (c < 0x0AB2) return false; if (c <= 0x0AB3) return true;
|
|
926 if (c < 0x0AB5) return false; if (c <= 0x0AB9) return true;
|
|
927 if (c == 0x0ABD) return true;
|
|
928 if (c == 0x0AE0) return true;
|
|
929 if (c < 0x0B05) return false; if (c <= 0x0B0C) return true;
|
|
930 if (c < 0x0B0F) return false; if (c <= 0x0B10) return true;
|
|
931 if (c < 0x0B13) return false; if (c <= 0x0B28) return true;
|
|
932 if (c < 0x0B2A) return false; if (c <= 0x0B30) return true;
|
|
933 if (c < 0x0B32) return false; if (c <= 0x0B33) return true;
|
|
934 if (c < 0x0B36) return false; if (c <= 0x0B39) return true;
|
|
935 if (c == 0x0B3D) return true;
|
|
936 if (c < 0x0B5C) return false; if (c <= 0x0B5D) return true;
|
|
937 if (c < 0x0B5F) return false; if (c <= 0x0B61) return true;
|
|
938 if (c < 0x0B85) return false; if (c <= 0x0B8A) return true;
|
|
939 if (c < 0x0B8E) return false; if (c <= 0x0B90) return true;
|
|
940 if (c < 0x0B92) return false; if (c <= 0x0B95) return true;
|
|
941 if (c < 0x0B99) return false; if (c <= 0x0B9A) return true;
|
|
942 if (c == 0x0B9C) return true;
|
|
943 if (c < 0x0B9E) return false; if (c <= 0x0B9F) return true;
|
|
944 if (c < 0x0BA3) return false; if (c <= 0x0BA4) return true;
|
|
945 if (c < 0x0BA8) return false; if (c <= 0x0BAA) return true;
|
|
946 if (c < 0x0BAE) return false; if (c <= 0x0BB5) return true;
|
|
947 if (c < 0x0BB7) return false; if (c <= 0x0BB9) return true;
|
|
948 if (c < 0x0C05) return false; if (c <= 0x0C0C) return true;
|
|
949 if (c < 0x0C0E) return false; if (c <= 0x0C10) return true;
|
|
950 if (c < 0x0C12) return false; if (c <= 0x0C28) return true;
|
|
951 if (c < 0x0C2A) return false; if (c <= 0x0C33) return true;
|
|
952 if (c < 0x0C35) return false; if (c <= 0x0C39) return true;
|
|
953 if (c < 0x0C60) return false; if (c <= 0x0C61) return true;
|
|
954 if (c < 0x0C85) return false; if (c <= 0x0C8C) return true;
|
|
955 if (c < 0x0C8E) return false; if (c <= 0x0C90) return true;
|
|
956 if (c < 0x0C92) return false; if (c <= 0x0CA8) return true;
|
|
957 if (c < 0x0CAA) return false; if (c <= 0x0CB3) return true;
|
|
958 if (c < 0x0CB5) return false; if (c <= 0x0CB9) return true;
|
|
959 if (c == 0x0CDE) return true;
|
|
960 if (c < 0x0CE0) return false; if (c <= 0x0CE1) return true;
|
|
961 if (c < 0x0D05) return false; if (c <= 0x0D0C) return true;
|
|
962 if (c < 0x0D0E) return false; if (c <= 0x0D10) return true;
|
|
963 if (c < 0x0D12) return false; if (c <= 0x0D28) return true;
|
|
964 if (c < 0x0D2A) return false; if (c <= 0x0D39) return true;
|
|
965 if (c < 0x0D60) return false; if (c <= 0x0D61) return true;
|
|
966 if (c < 0x0E01) return false; if (c <= 0x0E2E) return true;
|
|
967 if (c == 0x0E30) return true;
|
|
968 if (c < 0x0E32) return false; if (c <= 0x0E33) return true;
|
|
969 if (c < 0x0E40) return false; if (c <= 0x0E45) return true;
|
|
970 if (c < 0x0E81) return false; if (c <= 0x0E82) return true;
|
|
971 if (c == 0x0E84) return true;
|
|
972 if (c < 0x0E87) return false; if (c <= 0x0E88) return true;
|
|
973 if (c == 0x0E8A) return true;
|
|
974 if (c == 0x0E8D) return true;
|
|
975 if (c < 0x0E94) return false; if (c <= 0x0E97) return true;
|
|
976 if (c < 0x0E99) return false; if (c <= 0x0E9F) return true;
|
|
977 if (c < 0x0EA1) return false; if (c <= 0x0EA3) return true;
|
|
978 if (c == 0x0EA5) return true;
|
|
979 if (c == 0x0EA7) return true;
|
|
980 if (c < 0x0EAA) return false; if (c <= 0x0EAB) return true;
|
|
981 if (c < 0x0EAD) return false; if (c <= 0x0EAE) return true;
|
|
982 if (c == 0x0EB0) return true;
|
|
983 if (c < 0x0EB2) return false; if (c <= 0x0EB3) return true;
|
|
984 if (c == 0x0EBD) return true;
|
|
985 if (c < 0x0EC0) return false; if (c <= 0x0EC4) return true;
|
|
986 if (c < 0x0F40) return false; if (c <= 0x0F47) return true;
|
|
987 if (c < 0x0F49) return false; if (c <= 0x0F69) return true;
|
|
988 if (c < 0x10A0) return false; if (c <= 0x10C5) return true;
|
|
989 if (c < 0x10D0) return false; if (c <= 0x10F6) return true;
|
|
990 if (c == 0x1100) return true;
|
|
991 if (c < 0x1102) return false; if (c <= 0x1103) return true;
|
|
992 if (c < 0x1105) return false; if (c <= 0x1107) return true;
|
|
993 if (c == 0x1109) return true;
|
|
994 if (c < 0x110B) return false; if (c <= 0x110C) return true;
|
|
995 if (c < 0x110E) return false; if (c <= 0x1112) return true;
|
|
996 if (c == 0x113C) return true;
|
|
997 if (c == 0x113E) return true;
|
|
998 if (c == 0x1140) return true;
|
|
999 if (c == 0x114C) return true;
|
|
1000 if (c == 0x114E) return true;
|
|
1001 if (c == 0x1150) return true;
|
|
1002 if (c < 0x1154) return false; if (c <= 0x1155) return true;
|
|
1003 if (c == 0x1159) return true;
|
|
1004 if (c < 0x115F) return false; if (c <= 0x1161) return true;
|
|
1005 if (c == 0x1163) return true;
|
|
1006 if (c == 0x1165) return true;
|
|
1007 if (c == 0x1167) return true;
|
|
1008 if (c == 0x1169) return true;
|
|
1009 if (c < 0x116D) return false; if (c <= 0x116E) return true;
|
|
1010 if (c < 0x1172) return false; if (c <= 0x1173) return true;
|
|
1011 if (c == 0x1175) return true;
|
|
1012 if (c == 0x119E) return true;
|
|
1013 if (c == 0x11A8) return true;
|
|
1014 if (c == 0x11AB) return true;
|
|
1015 if (c < 0x11AE) return false; if (c <= 0x11AF) return true;
|
|
1016 if (c < 0x11B7) return false; if (c <= 0x11B8) return true;
|
|
1017 if (c == 0x11BA) return true;
|
|
1018 if (c < 0x11BC) return false; if (c <= 0x11C2) return true;
|
|
1019 if (c == 0x11EB) return true;
|
|
1020 if (c == 0x11F0) return true;
|
|
1021 if (c == 0x11F9) return true;
|
|
1022 if (c < 0x1E00) return false; if (c <= 0x1E9B) return true;
|
|
1023 if (c < 0x1EA0) return false; if (c <= 0x1EF9) return true;
|
|
1024 if (c < 0x1F00) return false; if (c <= 0x1F15) return true;
|
|
1025 if (c < 0x1F18) return false; if (c <= 0x1F1D) return true;
|
|
1026 if (c < 0x1F20) return false; if (c <= 0x1F45) return true;
|
|
1027 if (c < 0x1F48) return false; if (c <= 0x1F4D) return true;
|
|
1028 if (c < 0x1F50) return false; if (c <= 0x1F57) return true;
|
|
1029 if (c == 0x1F59) return true;
|
|
1030 if (c == 0x1F5B) return true;
|
|
1031 if (c == 0x1F5D) return true;
|
|
1032 if (c < 0x1F5F) return false; if (c <= 0x1F7D) return true;
|
|
1033 if (c < 0x1F80) return false; if (c <= 0x1FB4) return true;
|
|
1034 if (c < 0x1FB6) return false; if (c <= 0x1FBC) return true;
|
|
1035 if (c == 0x1FBE) return true;
|
|
1036 if (c < 0x1FC2) return false; if (c <= 0x1FC4) return true;
|
|
1037 if (c < 0x1FC6) return false; if (c <= 0x1FCC) return true;
|
|
1038 if (c < 0x1FD0) return false; if (c <= 0x1FD3) return true;
|
|
1039 if (c < 0x1FD6) return false; if (c <= 0x1FDB) return true;
|
|
1040 if (c < 0x1FE0) return false; if (c <= 0x1FEC) return true;
|
|
1041 if (c < 0x1FF2) return false; if (c <= 0x1FF4) return true;
|
|
1042 if (c < 0x1FF6) return false; if (c <= 0x1FFC) return true;
|
|
1043 if (c == 0x2126) return true;
|
|
1044 if (c < 0x212A) return false; if (c <= 0x212B) return true;
|
|
1045 if (c == 0x212E) return true;
|
|
1046 if (c < 0x2180) return false; if (c <= 0x2182) return true;
|
|
1047 if (c == 0x3007) return true; // ideographic
|
|
1048 if (c < 0x3021) return false; if (c <= 0x3029) return true; // ideo
|
|
1049 if (c < 0x3041) return false; if (c <= 0x3094) return true;
|
|
1050 if (c < 0x30A1) return false; if (c <= 0x30FA) return true;
|
|
1051 if (c < 0x3105) return false; if (c <= 0x312C) return true;
|
|
1052 if (c < 0x4E00) return false; if (c <= 0x9FA5) return true; // ideo
|
|
1053 if (c < 0xAC00) return false; if (c <= 0xD7A3) return true;
|
|
1054
|
|
1055 return false;
|
|
1056
|
|
1057 }
|
|
1058
|
|
1059 /**
|
|
1060 * This is a utility function for determining whether a specified character
|
|
1061 * is a combining character according to production 87
|
|
1062 * of the XML 1.0 specification.
|
|
1063 *
|
|
1064 * @param c <code>char</code> to check.
|
|
1065 * @return <code>boolean</code> true if it's a combining character,
|
|
1066 * false otherwise.
|
|
1067 */
|
|
1068 public static boolean isXMLCombiningChar(char c) {
|
|
1069 // CombiningChar
|
|
1070 if (c < 0x0300) return false; if (c <= 0x0345) return true;
|
|
1071 if (c < 0x0360) return false; if (c <= 0x0361) return true;
|
|
1072 if (c < 0x0483) return false; if (c <= 0x0486) return true;
|
|
1073 if (c < 0x0591) return false; if (c <= 0x05A1) return true;
|
|
1074
|
|
1075 if (c < 0x05A3) return false; if (c <= 0x05B9) return true;
|
|
1076 if (c < 0x05BB) return false; if (c <= 0x05BD) return true;
|
|
1077 if (c == 0x05BF) return true;
|
|
1078 if (c < 0x05C1) return false; if (c <= 0x05C2) return true;
|
|
1079
|
|
1080 if (c == 0x05C4) return true;
|
|
1081 if (c < 0x064B) return false; if (c <= 0x0652) return true;
|
|
1082 if (c == 0x0670) return true;
|
|
1083 if (c < 0x06D6) return false; if (c <= 0x06DC) return true;
|
|
1084
|
|
1085 if (c < 0x06DD) return false; if (c <= 0x06DF) return true;
|
|
1086 if (c < 0x06E0) return false; if (c <= 0x06E4) return true;
|
|
1087 if (c < 0x06E7) return false; if (c <= 0x06E8) return true;
|
|
1088
|
|
1089 if (c < 0x06EA) return false; if (c <= 0x06ED) return true;
|
|
1090 if (c < 0x0901) return false; if (c <= 0x0903) return true;
|
|
1091 if (c == 0x093C) return true;
|
|
1092 if (c < 0x093E) return false; if (c <= 0x094C) return true;
|
|
1093
|
|
1094 if (c == 0x094D) return true;
|
|
1095 if (c < 0x0951) return false; if (c <= 0x0954) return true;
|
|
1096 if (c < 0x0962) return false; if (c <= 0x0963) return true;
|
|
1097 if (c < 0x0981) return false; if (c <= 0x0983) return true;
|
|
1098
|
|
1099 if (c == 0x09BC) return true;
|
|
1100 if (c == 0x09BE) return true;
|
|
1101 if (c == 0x09BF) return true;
|
|
1102 if (c < 0x09C0) return false; if (c <= 0x09C4) return true;
|
|
1103 if (c < 0x09C7) return false; if (c <= 0x09C8) return true;
|
|
1104
|
|
1105 if (c < 0x09CB) return false; if (c <= 0x09CD) return true;
|
|
1106 if (c == 0x09D7) return true;
|
|
1107 if (c < 0x09E2) return false; if (c <= 0x09E3) return true;
|
|
1108 if (c == 0x0A02) return true;
|
|
1109 if (c == 0x0A3C) return true;
|
|
1110
|
|
1111 if (c == 0x0A3E) return true;
|
|
1112 if (c == 0x0A3F) return true;
|
|
1113 if (c < 0x0A40) return false; if (c <= 0x0A42) return true;
|
|
1114 if (c < 0x0A47) return false; if (c <= 0x0A48) return true;
|
|
1115
|
|
1116 if (c < 0x0A4B) return false; if (c <= 0x0A4D) return true;
|
|
1117 if (c < 0x0A70) return false; if (c <= 0x0A71) return true;
|
|
1118 if (c < 0x0A81) return false; if (c <= 0x0A83) return true;
|
|
1119 if (c == 0x0ABC) return true;
|
|
1120
|
|
1121 if (c < 0x0ABE) return false; if (c <= 0x0AC5) return true;
|
|
1122 if (c < 0x0AC7) return false; if (c <= 0x0AC9) return true;
|
|
1123 if (c < 0x0ACB) return false; if (c <= 0x0ACD) return true;
|
|
1124
|
|
1125 if (c < 0x0B01) return false; if (c <= 0x0B03) return true;
|
|
1126 if (c == 0x0B3C) return true;
|
|
1127 if (c < 0x0B3E) return false; if (c <= 0x0B43) return true;
|
|
1128 if (c < 0x0B47) return false; if (c <= 0x0B48) return true;
|
|
1129
|
|
1130 if (c < 0x0B4B) return false; if (c <= 0x0B4D) return true;
|
|
1131 if (c < 0x0B56) return false; if (c <= 0x0B57) return true;
|
|
1132 if (c < 0x0B82) return false; if (c <= 0x0B83) return true;
|
|
1133
|
|
1134 if (c < 0x0BBE) return false; if (c <= 0x0BC2) return true;
|
|
1135 if (c < 0x0BC6) return false; if (c <= 0x0BC8) return true;
|
|
1136 if (c < 0x0BCA) return false; if (c <= 0x0BCD) return true;
|
|
1137 if (c == 0x0BD7) return true;
|
|
1138
|
|
1139 if (c < 0x0C01) return false; if (c <= 0x0C03) return true;
|
|
1140 if (c < 0x0C3E) return false; if (c <= 0x0C44) return true;
|
|
1141 if (c < 0x0C46) return false; if (c <= 0x0C48) return true;
|
|
1142
|
|
1143 if (c < 0x0C4A) return false; if (c <= 0x0C4D) return true;
|
|
1144 if (c < 0x0C55) return false; if (c <= 0x0C56) return true;
|
|
1145 if (c < 0x0C82) return false; if (c <= 0x0C83) return true;
|
|
1146
|
|
1147 if (c < 0x0CBE) return false; if (c <= 0x0CC4) return true;
|
|
1148 if (c < 0x0CC6) return false; if (c <= 0x0CC8) return true;
|
|
1149 if (c < 0x0CCA) return false; if (c <= 0x0CCD) return true;
|
|
1150
|
|
1151 if (c < 0x0CD5) return false; if (c <= 0x0CD6) return true;
|
|
1152 if (c < 0x0D02) return false; if (c <= 0x0D03) return true;
|
|
1153 if (c < 0x0D3E) return false; if (c <= 0x0D43) return true;
|
|
1154
|
|
1155 if (c < 0x0D46) return false; if (c <= 0x0D48) return true;
|
|
1156 if (c < 0x0D4A) return false; if (c <= 0x0D4D) return true;
|
|
1157 if (c == 0x0D57) return true;
|
|
1158 if (c == 0x0E31) return true;
|
|
1159
|
|
1160 if (c < 0x0E34) return false; if (c <= 0x0E3A) return true;
|
|
1161 if (c < 0x0E47) return false; if (c <= 0x0E4E) return true;
|
|
1162 if (c == 0x0EB1) return true;
|
|
1163 if (c < 0x0EB4) return false; if (c <= 0x0EB9) return true;
|
|
1164
|
|
1165 if (c < 0x0EBB) return false; if (c <= 0x0EBC) return true;
|
|
1166 if (c < 0x0EC8) return false; if (c <= 0x0ECD) return true;
|
|
1167 if (c < 0x0F18) return false; if (c <= 0x0F19) return true;
|
|
1168 if (c == 0x0F35) return true;
|
|
1169
|
|
1170 if (c == 0x0F37) return true;
|
|
1171 if (c == 0x0F39) return true;
|
|
1172 if (c == 0x0F3E) return true;
|
|
1173 if (c == 0x0F3F) return true;
|
|
1174 if (c < 0x0F71) return false; if (c <= 0x0F84) return true;
|
|
1175
|
|
1176 if (c < 0x0F86) return false; if (c <= 0x0F8B) return true;
|
|
1177 if (c < 0x0F90) return false; if (c <= 0x0F95) return true;
|
|
1178 if (c == 0x0F97) return true;
|
|
1179 if (c < 0x0F99) return false; if (c <= 0x0FAD) return true;
|
|
1180
|
|
1181 if (c < 0x0FB1) return false; if (c <= 0x0FB7) return true;
|
|
1182 if (c == 0x0FB9) return true;
|
|
1183 if (c < 0x20D0) return false; if (c <= 0x20DC) return true;
|
|
1184 if (c == 0x20E1) return true;
|
|
1185
|
|
1186 if (c < 0x302A) return false; if (c <= 0x302F) return true;
|
|
1187 if (c == 0x3099) return true;
|
|
1188 if (c == 0x309A) return true;
|
|
1189
|
|
1190 return false;
|
|
1191
|
|
1192 }
|
|
1193
|
|
1194 /**
|
|
1195 * This is a utility function for determining whether a specified
|
|
1196 * character is an extender according to production 88 of the XML 1.0
|
|
1197 * specification.
|
|
1198 *
|
|
1199 * @param c <code>char</code> to check.
|
|
1200 * @return <code>String</code> true if it's an extender, false otherwise.
|
|
1201 */
|
|
1202 public static boolean isXMLExtender(char c) {
|
|
1203
|
|
1204 if (c < 0x00B6) return false; // quick short circuit
|
|
1205
|
|
1206 // Extenders
|
|
1207 if (c == 0x00B7) return true;
|
|
1208 if (c == 0x02D0) return true;
|
|
1209 if (c == 0x02D1) return true;
|
|
1210 if (c == 0x0387) return true;
|
|
1211 if (c == 0x0640) return true;
|
|
1212 if (c == 0x0E46) return true;
|
|
1213 if (c == 0x0EC6) return true;
|
|
1214 if (c == 0x3005) return true;
|
|
1215
|
|
1216 if (c < 0x3031) return false; if (c <= 0x3035) return true;
|
|
1217 if (c < 0x309D) return false; if (c <= 0x309E) return true;
|
|
1218 if (c < 0x30FC) return false; if (c <= 0x30FE) return true;
|
|
1219
|
|
1220 return false;
|
|
1221
|
|
1222 }
|
|
1223
|
|
1224 /**
|
|
1225 * This is a utility function for determining whether a specified
|
|
1226 * Unicode character
|
|
1227 * is a digit according to production 88 of the XML 1.0 specification.
|
|
1228 *
|
|
1229 * @param c <code>char</code> to check for XML digit compliance
|
|
1230 * @return <code>boolean</code> true if it's a digit, false otherwise
|
|
1231 */
|
|
1232 public static boolean isXMLDigit(char c) {
|
|
1233
|
|
1234 if (c < 0x0030) return false; if (c <= 0x0039) return true;
|
|
1235 if (c < 0x0660) return false; if (c <= 0x0669) return true;
|
|
1236 if (c < 0x06F0) return false; if (c <= 0x06F9) return true;
|
|
1237 if (c < 0x0966) return false; if (c <= 0x096F) return true;
|
|
1238
|
|
1239 if (c < 0x09E6) return false; if (c <= 0x09EF) return true;
|
|
1240 if (c < 0x0A66) return false; if (c <= 0x0A6F) return true;
|
|
1241 if (c < 0x0AE6) return false; if (c <= 0x0AEF) return true;
|
|
1242
|
|
1243 if (c < 0x0B66) return false; if (c <= 0x0B6F) return true;
|
|
1244 if (c < 0x0BE7) return false; if (c <= 0x0BEF) return true;
|
|
1245 if (c < 0x0C66) return false; if (c <= 0x0C6F) return true;
|
|
1246
|
|
1247 if (c < 0x0CE6) return false; if (c <= 0x0CEF) return true;
|
|
1248 if (c < 0x0D66) return false; if (c <= 0x0D6F) return true;
|
|
1249 if (c < 0x0E50) return false; if (c <= 0x0E59) return true;
|
|
1250
|
|
1251 if (c < 0x0ED0) return false; if (c <= 0x0ED9) return true;
|
|
1252 if (c < 0x0F20) return false; if (c <= 0x0F29) return true;
|
|
1253
|
|
1254 return false;
|
|
1255 }
|
|
1256
|
|
1257 /**
|
|
1258 * This is a utility function for determining whether a specified
|
|
1259 * Unicode character is a whitespace character according to production 3
|
|
1260 * of the XML 1.0 specification.
|
|
1261 *
|
|
1262 * @param c <code>char</code> to check for XML whitespace compliance
|
|
1263 * @return <code>boolean</code> true if it's a whitespace, false otherwise
|
|
1264 */
|
|
1265 public static boolean isXMLWhitespace(char c) {
|
|
1266 if (c==' ' || c=='\n' || c=='\t' || c=='\r' ){
|
|
1267 return true;
|
|
1268 }
|
|
1269 return false;
|
|
1270 }
|
|
1271 }
|