Mercurial > repos > pfrommolt > ngsrich
comparison NGSrich_0.5.5/src/org/jdom/Verifier.java @ 0:89ad0a9cca52 default tip
Uploaded
author | pfrommolt |
---|---|
date | Mon, 21 Nov 2011 08:12:19 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:89ad0a9cca52 |
---|---|
1 /*-- | |
2 | |
3 $Id: Verifier.java,v 1.57 2009/07/23 05:54:23 jhunter Exp $ | |
4 | |
5 Copyright (C) 2000-2007 Jason Hunter & Brett McLaughlin. | |
6 All rights reserved. | |
7 | |
8 Redistribution and use in source and binary forms, with or without | |
9 modification, are permitted provided that the following conditions | |
10 are met: | |
11 | |
12 1. Redistributions of source code must retain the above copyright | |
13 notice, this list of conditions, and the following disclaimer. | |
14 | |
15 2. Redistributions in binary form must reproduce the above copyright | |
16 notice, this list of conditions, and the disclaimer that follows | |
17 these conditions in the documentation and/or other materials | |
18 provided with the distribution. | |
19 | |
20 3. The name "JDOM" must not be used to endorse or promote products | |
21 derived from this software without prior written permission. For | |
22 written permission, please contact <request_AT_jdom_DOT_org>. | |
23 | |
24 4. Products derived from this software may not be called "JDOM", nor | |
25 may "JDOM" appear in their name, without prior written permission | |
26 from the JDOM Project Management <request_AT_jdom_DOT_org>. | |
27 | |
28 In addition, we request (but do not require) that you include in the | |
29 end-user documentation provided with the redistribution and/or in the | |
30 software itself an acknowledgement equivalent to the following: | |
31 "This product includes software developed by the | |
32 JDOM Project (http://www.jdom.org/)." | |
33 Alternatively, the acknowledgment may be graphical using the logos | |
34 available at http://www.jdom.org/images/logos. | |
35 | |
36 THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED | |
37 WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES | |
38 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | |
39 DISCLAIMED. IN NO EVENT SHALL THE JDOM AUTHORS OR THE PROJECT | |
40 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
41 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
42 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF | |
43 USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND | |
44 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |
45 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT | |
46 OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
47 SUCH DAMAGE. | |
48 | |
49 This software consists of voluntary contributions made by many | |
50 individuals on behalf of the JDOM Project and was originally | |
51 created by Jason Hunter <jhunter_AT_jdom_DOT_org> and | |
52 Brett McLaughlin <brett_AT_jdom_DOT_org>. For more information | |
53 on the JDOM Project, please see <http://www.jdom.org/>. | |
54 | |
55 */ | |
56 | |
57 package org.jdom; | |
58 | |
59 import java.util.*; | |
60 | |
61 /** | |
62 * A utility class to handle well-formedness checks on names, data, and other | |
63 * verification tasks for JDOM. The class is final and may not be subclassed. | |
64 * | |
65 * @version $Revision: 1.57 $, $Date: 2009/07/23 05:54:23 $ | |
66 * @author Brett McLaughlin | |
67 * @author Elliotte Rusty Harold | |
68 * @author Jason Hunter | |
69 * @author Bradley S. Huffman | |
70 */ | |
71 final public class Verifier { | |
72 | |
73 private static final String CVS_ID = | |
74 "@(#) $RCSfile: Verifier.java,v $ $Revision: 1.57 $ $Date: 2009/07/23 05:54:23 $ $Name: jdom_1_1_1 $"; | |
75 | |
76 /** | |
77 * Ensure instantation cannot occur. | |
78 */ | |
79 private Verifier() { } | |
80 | |
81 /** | |
82 * This will check the supplied name to see if it is legal for use as | |
83 * a JDOM <code>{@link Element}</code> name. | |
84 * | |
85 * @param name <code>String</code> name to check. | |
86 * @return <code>String</code> reason name is illegal, or | |
87 * <code>null</code> if name is OK. | |
88 */ | |
89 public static String checkElementName(String name) { | |
90 // Check basic XML name rules first | |
91 String reason; | |
92 if ((reason = checkXMLName(name)) != null) { | |
93 return reason; | |
94 } | |
95 | |
96 // No colons allowed, since elements handle this internally | |
97 if (name.indexOf(":") != -1) { | |
98 return "Element names cannot contain colons"; | |
99 } | |
100 | |
101 // If we got here, everything is OK | |
102 return null; | |
103 } | |
104 | |
105 /** | |
106 * This will check the supplied name to see if it is legal for use as | |
107 * a JDOM <code>{@link Attribute}</code> name. | |
108 * | |
109 * @param name <code>String</code> name to check. | |
110 * @return <code>String</code> reason name is illegal, or | |
111 * <code>null</code> if name is OK. | |
112 */ | |
113 public static String checkAttributeName(String name) { | |
114 // Check basic XML name rules first | |
115 String reason; | |
116 if ((reason = checkXMLName(name)) != null) { | |
117 return reason; | |
118 } | |
119 | |
120 // No colons are allowed, since attributes handle this internally | |
121 if (name.indexOf(":") != -1) { | |
122 return "Attribute names cannot contain colons"; | |
123 } | |
124 | |
125 // Attribute names may not be xmlns since we do this internally too | |
126 if (name.equals("xmlns")) { | |
127 return "An Attribute name may not be \"xmlns\"; " + | |
128 "use the Namespace class to manage namespaces"; | |
129 } | |
130 | |
131 // If we got here, everything is OK | |
132 return null; | |
133 } | |
134 | |
135 /** | |
136 * This will check the supplied string to see if it only contains | |
137 * characters allowed by the XML 1.0 specification. The C0 controls | |
138 * (e.g. null, vertical tab, formfeed, etc.) are specifically excluded | |
139 * except for carriage return, linefeed, and the horizontal tab. | |
140 * Surrogates are also excluded. | |
141 * <p> | |
142 * This method is useful for checking element content and attribute | |
143 * values. Note that characters | |
144 * like " and < are allowed in attribute values and element content. | |
145 * They will simply be escaped as " or < | |
146 * when the value is serialized. | |
147 * </p> | |
148 * | |
149 * @param text <code>String</code> value to check. | |
150 * @return <code>String</code> reason name is illegal, or | |
151 * <code>null</code> if name is OK. | |
152 */ | |
153 public static String checkCharacterData(String text) { | |
154 if (text == null) { | |
155 return "A null is not a legal XML value"; | |
156 } | |
157 | |
158 // Do check | |
159 for (int i = 0, len = text.length(); i<len; i++) { | |
160 | |
161 int ch = text.charAt(i); | |
162 | |
163 // Check if high part of a surrogate pair | |
164 if (isHighSurrogate((char) ch)) { | |
165 // Check if next char is the low-surrogate | |
166 i++; | |
167 if (i < len) { | |
168 char low = text.charAt(i); | |
169 if (!isLowSurrogate(low)) { | |
170 return "Illegal Surrogate Pair"; | |
171 } | |
172 // It's a good pair, calculate the true value of | |
173 // the character to then fall thru to isXMLCharacter | |
174 ch = decodeSurrogatePair((char) ch, low); | |
175 } | |
176 else { | |
177 return "Surrogate Pair Truncated"; | |
178 } | |
179 } | |
180 | |
181 if (!isXMLCharacter(ch)) { | |
182 // Likely this character can't be easily displayed | |
183 // because it's a control so we use it'd hexadecimal | |
184 // representation in the reason. | |
185 return ("0x" + Integer.toHexString(ch) + | |
186 " is not a legal XML character"); | |
187 } | |
188 } | |
189 | |
190 // If we got here, everything is OK | |
191 return null; | |
192 } | |
193 | |
194 /** | |
195 * This will check the supplied data to see if it is legal for use as | |
196 * JDOM <code>{@link CDATA}</code>. | |
197 * | |
198 * @param data <code>String</code> data to check. | |
199 * @return <code>String</code> reason data is illegal, or | |
200 * <code>null</code> is name is OK. | |
201 */ | |
202 public static String checkCDATASection(String data) { | |
203 String reason = null; | |
204 if ((reason = checkCharacterData(data)) != null) { | |
205 return reason; | |
206 } | |
207 | |
208 if (data.indexOf("]]>") != -1) { | |
209 return "CDATA cannot internally contain a CDATA ending " + | |
210 "delimiter (]]>)"; | |
211 } | |
212 | |
213 // If we got here, everything is OK | |
214 return null; | |
215 } | |
216 | |
217 /** | |
218 * This will check the supplied name to see if it is legal for use as | |
219 * a JDOM <code>{@link Namespace}</code> prefix. | |
220 * | |
221 * @param prefix <code>String</code> prefix to check. | |
222 * @return <code>String</code> reason name is illegal, or | |
223 * <code>null</code> if name is OK. | |
224 */ | |
225 public static String checkNamespacePrefix(String prefix) { | |
226 // Manually do rules, since URIs can be null or empty | |
227 if ((prefix == null) || (prefix.equals(""))) { | |
228 return null; | |
229 } | |
230 | |
231 // Cannot start with a number | |
232 char first = prefix.charAt(0); | |
233 if (isXMLDigit(first)) { | |
234 return "Namespace prefixes cannot begin with a number"; | |
235 } | |
236 // Cannot start with a $ | |
237 if (first == '$') { | |
238 return "Namespace prefixes cannot begin with a dollar sign ($)"; | |
239 } | |
240 // Cannot start with a - | |
241 if (first == '-') { | |
242 return "Namespace prefixes cannot begin with a hyphen (-)"; | |
243 } | |
244 // Cannot start with a . | |
245 if (first == '.') { | |
246 return "Namespace prefixes cannot begin with a period (.)"; | |
247 } | |
248 // Cannot start with "xml" in any character case | |
249 if (prefix.toLowerCase().startsWith("xml")) { | |
250 return "Namespace prefixes cannot begin with " + | |
251 "\"xml\" in any combination of case"; | |
252 } | |
253 | |
254 // Ensure legal content | |
255 for (int i=0, len = prefix.length(); i<len; i++) { | |
256 char c = prefix.charAt(i); | |
257 if (!isXMLNameCharacter(c)) { | |
258 return "Namespace prefixes cannot contain the character \"" + | |
259 c + "\""; | |
260 } | |
261 } | |
262 | |
263 // No colons allowed | |
264 if (prefix.indexOf(":") != -1) { | |
265 return "Namespace prefixes cannot contain colons"; | |
266 } | |
267 | |
268 // If we got here, everything is OK | |
269 return null; | |
270 } | |
271 | |
272 /** | |
273 * This will check the supplied name to see if it is legal for use as | |
274 * a JDOM <code>{@link Namespace}</code> URI. | |
275 * | |
276 * @param uri <code>String</code> URI to check. | |
277 * @return <code>String</code> reason name is illegal, or | |
278 * <code>null</code> if name is OK. | |
279 */ | |
280 public static String checkNamespaceURI(String uri) { | |
281 // Manually do rules, since URIs can be null or empty | |
282 if ((uri == null) || (uri.equals(""))) { | |
283 return null; | |
284 } | |
285 | |
286 // Cannot start with a number | |
287 char first = uri.charAt(0); | |
288 if (Character.isDigit(first)) { | |
289 return "Namespace URIs cannot begin with a number"; | |
290 } | |
291 // Cannot start with a $ | |
292 if (first == '$') { | |
293 return "Namespace URIs cannot begin with a dollar sign ($)"; | |
294 } | |
295 // Cannot start with a - | |
296 if (first == '-') { | |
297 return "Namespace URIs cannot begin with a hyphen (-)"; | |
298 } | |
299 | |
300 // If we got here, everything is OK | |
301 return null; | |
302 } | |
303 | |
304 /** | |
305 * Check if two namespaces collide. | |
306 * | |
307 * @param namespace <code>Namespace</code> to check. | |
308 * @param other <code>Namespace</code> to check against. | |
309 * @return <code>String</code> reason for collision, or | |
310 * <code>null</code> if no collision. | |
311 */ | |
312 public static String checkNamespaceCollision(Namespace namespace, | |
313 Namespace other) { | |
314 String p1,p2,u1,u2,reason; | |
315 | |
316 reason = null; | |
317 p1 = namespace.getPrefix(); | |
318 u1 = namespace.getURI(); | |
319 p2 = other.getPrefix(); | |
320 u2 = other.getURI(); | |
321 if (p1.equals(p2) && !u1.equals(u2)) { | |
322 reason = "The namespace prefix \"" + p1 + "\" collides"; | |
323 } | |
324 return reason; | |
325 } | |
326 | |
327 /** | |
328 * Check if <code>{@link Attribute}</code>'s namespace collides with a | |
329 * <code>{@link Element}</code>'s namespace. | |
330 * | |
331 * @param attribute <code>Attribute</code> to check. | |
332 * @param element <code>Element</code> to check against. | |
333 * @return <code>String</code> reason for collision, or | |
334 * <code>null</code> if no collision. | |
335 */ | |
336 public static String checkNamespaceCollision(Attribute attribute, | |
337 Element element) { | |
338 Namespace namespace = attribute.getNamespace(); | |
339 String prefix = namespace.getPrefix(); | |
340 if ("".equals(prefix)) { | |
341 return null; | |
342 } | |
343 | |
344 return checkNamespaceCollision(namespace, element); | |
345 } | |
346 | |
347 /** | |
348 * Check if a <code>{@link Namespace}</code> collides with a | |
349 * <code>{@link Element}</code>'s namespace. | |
350 * | |
351 * @param namespace <code>Namespace</code> to check. | |
352 * @param element <code>Element</code> to check against. | |
353 * @return <code>String</code> reason for collision, or | |
354 * <code>null</code> if no collision. | |
355 */ | |
356 public static String checkNamespaceCollision(Namespace namespace, | |
357 Element element) { | |
358 String reason = checkNamespaceCollision(namespace, | |
359 element.getNamespace()); | |
360 if (reason != null) { | |
361 return reason + " with the element namespace prefix"; | |
362 } | |
363 | |
364 reason = checkNamespaceCollision(namespace, | |
365 element.getAdditionalNamespaces()); | |
366 if (reason != null) { | |
367 return reason; | |
368 } | |
369 | |
370 reason = checkNamespaceCollision(namespace, element.getAttributes()); | |
371 if (reason != null) { | |
372 return reason; | |
373 } | |
374 | |
375 return null; | |
376 } | |
377 | |
378 /** | |
379 * Check if a <code>{@link Namespace}</code> collides with a | |
380 * <code>{@link Attribute}</code>'s namespace. | |
381 * | |
382 * @param namespace <code>Namespace</code> to check. | |
383 * @param attribute <code>Attribute</code> to check against. | |
384 * @return <code>String</code> reason for collision, or | |
385 * <code>null</code> if no collision. | |
386 */ | |
387 public static String checkNamespaceCollision(Namespace namespace, | |
388 Attribute attribute) { | |
389 String reason = null; | |
390 if (!attribute.getNamespace().equals(Namespace.NO_NAMESPACE)) { | |
391 reason = checkNamespaceCollision(namespace, | |
392 attribute.getNamespace()); | |
393 if (reason != null) { | |
394 reason += " with an attribute namespace prefix on the element"; | |
395 } | |
396 } | |
397 return reason; | |
398 } | |
399 | |
400 /** | |
401 * Check if a <code>{@link Namespace}</code> collides with any namespace | |
402 * from a list of objects. | |
403 * | |
404 * @param namespace <code>Namespace</code> to check. | |
405 * @param list <code>List</code> to check against. | |
406 * @return <code>String</code> reason for collision, or | |
407 * <code>null</code> if no collision. | |
408 */ | |
409 public static String checkNamespaceCollision(Namespace namespace, | |
410 List list) { | |
411 if (list == null) { | |
412 return null; | |
413 } | |
414 | |
415 String reason = null; | |
416 Iterator i = list.iterator(); | |
417 while ((reason == null) && i.hasNext()) { | |
418 Object obj = i.next(); | |
419 if (obj instanceof Attribute) { | |
420 reason = checkNamespaceCollision(namespace, (Attribute) obj); | |
421 } | |
422 else if (obj instanceof Element) { | |
423 reason = checkNamespaceCollision(namespace, (Element) obj); | |
424 } | |
425 else if (obj instanceof Namespace) { | |
426 reason = checkNamespaceCollision(namespace, (Namespace) obj); | |
427 if (reason != null) { | |
428 reason += " with an additional namespace declared" + | |
429 " by the element"; | |
430 } | |
431 } | |
432 } | |
433 return reason; | |
434 } | |
435 | |
436 /** | |
437 * This will check the supplied data to see if it is legal for use as | |
438 * a JDOM <code>{@link ProcessingInstruction}</code> target. | |
439 * | |
440 * @param target <code>String</code> target to check. | |
441 * @return <code>String</code> reason target is illegal, or | |
442 * <code>null</code> if target is OK. | |
443 */ | |
444 public static String checkProcessingInstructionTarget(String target) { | |
445 // Check basic XML name rules first | |
446 String reason; | |
447 if ((reason = checkXMLName(target)) != null) { | |
448 return reason; | |
449 } | |
450 | |
451 // No colons allowed, per Namespace Specification Section 6 | |
452 if (target.indexOf(":") != -1) { | |
453 return "Processing instruction targets cannot contain colons"; | |
454 } | |
455 | |
456 // Cannot begin with 'xml' in any case | |
457 if (target.equalsIgnoreCase("xml")) { | |
458 return "Processing instructions cannot have a target of " + | |
459 "\"xml\" in any combination of case. (Note that the " + | |
460 "\"<?xml ... ?>\" declaration at the beginning of a " + | |
461 "document is not a processing instruction and should not " + | |
462 "be added as one; it is written automatically during " + | |
463 "output, e.g. by XMLOutputter.)"; | |
464 } | |
465 | |
466 // If we got here, everything is OK | |
467 return null; | |
468 } | |
469 | |
470 /** | |
471 * This will check the supplied data to see if it is legal for use as | |
472 * <code>{@link ProcessingInstruction}</code> data. Besides checking that | |
473 * all the characters are allowed in XML, this also checks | |
474 * that the data does not contain the PI end-string "?>". | |
475 * | |
476 * @param data <code>String</code> data to check. | |
477 * @return <code>String</code> reason data is illegal, or | |
478 * <code>null</code> if data is OK. | |
479 */ | |
480 public static String checkProcessingInstructionData(String data) { | |
481 // Check basic XML name rules first | |
482 String reason = checkCharacterData(data); | |
483 | |
484 if (reason == null) { | |
485 if (data.indexOf("?>") >= 0) { | |
486 return "Processing instructions cannot contain " + | |
487 "the string \"?>\""; | |
488 } | |
489 } | |
490 | |
491 return reason; | |
492 } | |
493 | |
494 /** | |
495 * This will check the supplied data to see if it is legal for use as | |
496 * JDOM <code>{@link Comment}</code> data. | |
497 * | |
498 * @param data <code>String</code> data to check. | |
499 * @return <code>String</code> reason data is illegal, or | |
500 * <code>null</code> if data is OK. | |
501 */ | |
502 public static String checkCommentData(String data) { | |
503 String reason = null; | |
504 if ((reason = checkCharacterData(data)) != null) { | |
505 return reason; | |
506 } | |
507 | |
508 if (data.indexOf("--") != -1) { | |
509 return "Comments cannot contain double hyphens (--)"; | |
510 } | |
511 if (data.endsWith("-")) { | |
512 return "Comment data cannot end with a hyphen."; | |
513 } | |
514 | |
515 // If we got here, everything is OK | |
516 return null; | |
517 } | |
518 /** | |
519 * This is a utility function to decode a non-BMP | |
520 * UTF-16 surrogate pair. | |
521 * @param high high 16 bits | |
522 * @param low low 16 bits | |
523 * @return decoded character | |
524 */ | |
525 public static int decodeSurrogatePair(char high, char low) { | |
526 return 0x10000 + (high - 0xD800) * 0x400 + (low - 0xDC00); | |
527 } | |
528 | |
529 // [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | | |
530 // [-'()+,./:=?;*#@$_%] | |
531 public static boolean isXMLPublicIDCharacter(char c) { | |
532 | |
533 if (c >= 'a' && c <= 'z') return true; | |
534 if (c >= '?' && c <= 'Z') return true; | |
535 if (c >= '\'' && c <= ';') return true; | |
536 | |
537 if (c == ' ') return true; | |
538 if (c == '!') return true; | |
539 if (c == '=') return true; | |
540 if (c == '#') return true; | |
541 if (c == '$') return true; | |
542 if (c == '_') return true; | |
543 if (c == '%') return true; | |
544 if (c == '\n') return true; | |
545 if (c == '\r') return true; | |
546 if (c == '\t') return true; | |
547 | |
548 return false; | |
549 } | |
550 | |
551 /** | |
552 * This will ensure that the data for a public identifier | |
553 * is legal. | |
554 * | |
555 * @param publicID <code>String</code> public ID to check. | |
556 * @return <code>String</code> reason public ID is illegal, or | |
557 * <code>null</code> if public ID is OK. | |
558 */ | |
559 public static String checkPublicID(String publicID) { | |
560 String reason = null; | |
561 | |
562 if (publicID == null) return null; | |
563 // This indicates there is no public ID | |
564 | |
565 for (int i = 0; i < publicID.length(); i++) { | |
566 char c = publicID.charAt(i); | |
567 if (!isXMLPublicIDCharacter(c)) { | |
568 reason = c + " is not a legal character in public IDs"; | |
569 break; | |
570 } | |
571 } | |
572 | |
573 return reason; | |
574 } | |
575 | |
576 | |
577 /** | |
578 * This will ensure that the data for a system literal | |
579 * is legal. | |
580 * | |
581 * @param systemLiteral <code>String</code> system literal to check. | |
582 * @return <code>String</code> reason system literal is illegal, or | |
583 * <code>null</code> if system literal is OK. | |
584 */ | |
585 public static String checkSystemLiteral(String systemLiteral) { | |
586 String reason = null; | |
587 | |
588 if (systemLiteral == null) return null; | |
589 // This indicates there is no system ID | |
590 | |
591 if (systemLiteral.indexOf('\'') != -1 | |
592 && systemLiteral.indexOf('"') != -1) { | |
593 reason = | |
594 "System literals cannot simultaneously contain both single and double quotes."; | |
595 } | |
596 else { | |
597 reason = checkCharacterData(systemLiteral); | |
598 } | |
599 | |
600 return reason; | |
601 } | |
602 | |
603 /** | |
604 * This is a utility function for sharing the base process of checking | |
605 * any XML name. | |
606 * | |
607 * @param name <code>String</code> to check for XML name compliance. | |
608 * @return <code>String</code> reason the name is illegal, or | |
609 * <code>null</code> if OK. | |
610 */ | |
611 public static String checkXMLName(String name) { | |
612 // Cannot be empty or null | |
613 if ((name == null) || (name.length() == 0) | |
614 || (name.trim().equals(""))) { | |
615 return "XML names cannot be null or empty"; | |
616 } | |
617 | |
618 | |
619 // Cannot start with a number | |
620 char first = name.charAt(0); | |
621 if (!isXMLNameStartCharacter(first)) { | |
622 return "XML names cannot begin with the character \"" + | |
623 first + "\""; | |
624 } | |
625 // Ensure legal content for non-first chars | |
626 for (int i=1, len = name.length(); i<len; i++) { | |
627 char c = name.charAt(i); | |
628 if (!isXMLNameCharacter(c)) { | |
629 return "XML names cannot contain the character \"" + c + "\""; | |
630 } | |
631 } | |
632 | |
633 // We got here, so everything is OK | |
634 return null; | |
635 } | |
636 | |
637 /** | |
638 * <p> | |
639 * Checks a string to see if it is a legal RFC 2396 URI. | |
640 * Both absolute and relative URIs are supported. | |
641 * </p> | |
642 * | |
643 * @param uri <code>String</code> to check. | |
644 * @return <code>String</code> reason the URI is illegal, or | |
645 * <code>null</code> if OK. | |
646 */ | |
647 public static String checkURI(String uri) { | |
648 // URIs can be null or empty | |
649 if ((uri == null) || (uri.equals(""))) { | |
650 return null; | |
651 } | |
652 | |
653 for (int i = 0; i < uri.length(); i++) { | |
654 char test = uri.charAt(i); | |
655 if (!isURICharacter(test)) { | |
656 String msgNumber = "0x" + Integer.toHexString(test); | |
657 if (test <= 0x09) msgNumber = "0x0" + Integer.toHexString(test); | |
658 return "URIs cannot contain " + msgNumber; | |
659 } // end if | |
660 if (test == '%') { // must be followed by two hexadecimal digits | |
661 try { | |
662 char firstDigit = uri.charAt(i+1); | |
663 char secondDigit = uri.charAt(i+2); | |
664 if (!isHexDigit(firstDigit) || | |
665 !isHexDigit(secondDigit)) { | |
666 return "Percent signs in URIs must be followed by " | |
667 + "exactly two hexadecimal digits."; | |
668 } | |
669 | |
670 } | |
671 catch (StringIndexOutOfBoundsException e) { | |
672 return "Percent signs in URIs must be followed by " | |
673 + "exactly two hexadecimal digits."; | |
674 } | |
675 } | |
676 } // end for | |
677 | |
678 // If we got here, everything is OK | |
679 return null; | |
680 } | |
681 | |
682 /** | |
683 * <p> | |
684 * This is a utility function for determining whether a specified | |
685 * Unicode character is a hexadecimal digit as defined in RFC 2396; | |
686 * that is, one of the ASCII characters 0-9, a-f, or A-F. | |
687 * </p> | |
688 * | |
689 * @param c to check for hex digit. | |
690 * @return true if it's allowed, false otherwise. | |
691 */ | |
692 public static boolean isHexDigit(char c) { | |
693 | |
694 // I suspect most characters passed to this method will be | |
695 // correct hexadecimal digits, so I test for the true cases | |
696 // first. If this proves to be a performance bottleneck | |
697 // a switch statement or lookup table | |
698 // might optimize this. | |
699 if (c >= '0' && c <= '9') return true; | |
700 if (c >= 'A' && c <= 'F') return true; | |
701 if (c >= 'a' && c <= 'f') return true; | |
702 | |
703 return false; | |
704 } | |
705 | |
706 /** | |
707 * This is a function for determining whether the | |
708 * specified character is the high 16 bits in a | |
709 * UTF-16 surrogate pair. | |
710 * @param ch character to check | |
711 * @return true if the character is a high surrogate, false otherwise | |
712 */ | |
713 public static boolean isHighSurrogate(char ch) { | |
714 return (ch >= 0xD800 && ch <= 0xDBFF); | |
715 } | |
716 | |
717 /** | |
718 * This is a function for determining whether the | |
719 * specified character is the low 16 bits in a | |
720 * UTF-16 surrogate pair. | |
721 * @param ch character to check | |
722 * @return true if the character is a low surrogate, false otherwise. | |
723 */ | |
724 public static boolean isLowSurrogate(char ch) { | |
725 return (ch >= 0xDC00 && ch <= 0xDFFF); | |
726 } | |
727 | |
728 /** | |
729 * <p> | |
730 * This is a utility function for determining whether | |
731 * a specified Unicode character is legal in URI references | |
732 * as determined by RFC 2396. | |
733 * </p> | |
734 * | |
735 * @param c <code>char</code> to check for URI reference compliance. | |
736 * @return true if it's allowed, false otherwise. | |
737 */ | |
738 public static boolean isURICharacter(char c) { | |
739 if (c >= 'a' && c <= 'z') return true; | |
740 if (c >= 'A' && c <= 'Z') return true; | |
741 if (c >= '0' && c <= '9') return true; | |
742 if (c == '/') return true; | |
743 if (c == '-') return true; | |
744 if (c == '.') return true; | |
745 if (c == '?') return true; | |
746 if (c == ':') return true; | |
747 if (c == '@') return true; | |
748 if (c == '&') return true; | |
749 if (c == '=') return true; | |
750 if (c == '+') return true; | |
751 if (c == '$') return true; | |
752 if (c == ',') return true; | |
753 if (c == '%') return true; | |
754 | |
755 if (c == '_') return true; | |
756 if (c == '!') return true; | |
757 if (c == '~') return true; | |
758 if (c == '*') return true; | |
759 if (c == '\'') return true; | |
760 if (c == '(') return true; | |
761 if (c == ')') return true; | |
762 return false; | |
763 } | |
764 | |
765 /** | |
766 * This is a utility function for determining whether a specified | |
767 * character is a character according to production 2 of the | |
768 * XML 1.0 specification. | |
769 * | |
770 * @param c <code>char</code> to check for XML compliance | |
771 * @return <code>boolean</code> true if it's a character, | |
772 * false otherwise | |
773 */ | |
774 public static boolean isXMLCharacter(int c) { | |
775 | |
776 if (c == '\n') return true; | |
777 if (c == '\r') return true; | |
778 if (c == '\t') return true; | |
779 | |
780 if (c < 0x20) return false; if (c <= 0xD7FF) return true; | |
781 if (c < 0xE000) return false; if (c <= 0xFFFD) return true; | |
782 if (c < 0x10000) return false; if (c <= 0x10FFFF) return true; | |
783 | |
784 return false; | |
785 } | |
786 | |
787 | |
788 /** | |
789 * This is a utility function for determining whether a specified | |
790 * character is a name character according to production 4 of the | |
791 * XML 1.0 specification. | |
792 * | |
793 * @param c <code>char</code> to check for XML name compliance. | |
794 * @return <code>boolean</code> true if it's a name character, | |
795 * false otherwise. | |
796 */ | |
797 public static boolean isXMLNameCharacter(char c) { | |
798 | |
799 return (isXMLLetter(c) || isXMLDigit(c) || c == '.' || c == '-' | |
800 || c == '_' || c == ':' || isXMLCombiningChar(c) | |
801 || isXMLExtender(c)); | |
802 } | |
803 | |
804 /** | |
805 * This is a utility function for determining whether a specified | |
806 * character is a legal name start character according to production 5 | |
807 * of the XML 1.0 specification. This production does allow names | |
808 * to begin with colons which the Namespaces in XML Recommendation | |
809 * disallows. | |
810 * | |
811 * @param c <code>char</code> to check for XML name start compliance. | |
812 * @return <code>boolean</code> true if it's a name start character, | |
813 * false otherwise. | |
814 */ | |
815 public static boolean isXMLNameStartCharacter(char c) { | |
816 | |
817 return (isXMLLetter(c) || c == '_' || c ==':'); | |
818 | |
819 } | |
820 | |
821 /** | |
822 * This is a utility function for determining whether a specified | |
823 * character is a letter or digit according to productions 84 and 88 | |
824 * of the XML 1.0 specification. | |
825 * | |
826 * @param c <code>char</code> to check. | |
827 * @return <code>boolean</code> true if it's letter or digit, | |
828 * false otherwise. | |
829 */ | |
830 public static boolean isXMLLetterOrDigit(char c) { | |
831 | |
832 return (isXMLLetter(c) || isXMLDigit(c)); | |
833 | |
834 } | |
835 | |
836 /** | |
837 * This is a utility function for determining whether a specified character | |
838 * is a letter according to production 84 of the XML 1.0 specification. | |
839 * | |
840 * @param c <code>char</code> to check for XML name compliance. | |
841 * @return <code>String</code> true if it's a letter, false otherwise. | |
842 */ | |
843 public static boolean isXMLLetter(char c) { | |
844 // Note that order is very important here. The search proceeds | |
845 // from lowest to highest values, so that no searching occurs | |
846 // above the character's value. BTW, the first line is equivalent to: | |
847 // if (c >= 0x0041 && c <= 0x005A) return true; | |
848 | |
849 if (c < 0x0041) return false; if (c <= 0x005a) return true; | |
850 if (c < 0x0061) return false; if (c <= 0x007A) return true; | |
851 if (c < 0x00C0) return false; if (c <= 0x00D6) return true; | |
852 if (c < 0x00D8) return false; if (c <= 0x00F6) return true; | |
853 if (c < 0x00F8) return false; if (c <= 0x00FF) return true; | |
854 if (c < 0x0100) return false; if (c <= 0x0131) return true; | |
855 if (c < 0x0134) return false; if (c <= 0x013E) return true; | |
856 if (c < 0x0141) return false; if (c <= 0x0148) return true; | |
857 if (c < 0x014A) return false; if (c <= 0x017E) return true; | |
858 if (c < 0x0180) return false; if (c <= 0x01C3) return true; | |
859 if (c < 0x01CD) return false; if (c <= 0x01F0) return true; | |
860 if (c < 0x01F4) return false; if (c <= 0x01F5) return true; | |
861 if (c < 0x01FA) return false; if (c <= 0x0217) return true; | |
862 if (c < 0x0250) return false; if (c <= 0x02A8) return true; | |
863 if (c < 0x02BB) return false; if (c <= 0x02C1) return true; | |
864 if (c == 0x0386) return true; | |
865 if (c < 0x0388) return false; if (c <= 0x038A) return true; | |
866 if (c == 0x038C) return true; | |
867 if (c < 0x038E) return false; if (c <= 0x03A1) return true; | |
868 if (c < 0x03A3) return false; if (c <= 0x03CE) return true; | |
869 if (c < 0x03D0) return false; if (c <= 0x03D6) return true; | |
870 if (c == 0x03DA) return true; | |
871 if (c == 0x03DC) return true; | |
872 if (c == 0x03DE) return true; | |
873 if (c == 0x03E0) return true; | |
874 if (c < 0x03E2) return false; if (c <= 0x03F3) return true; | |
875 if (c < 0x0401) return false; if (c <= 0x040C) return true; | |
876 if (c < 0x040E) return false; if (c <= 0x044F) return true; | |
877 if (c < 0x0451) return false; if (c <= 0x045C) return true; | |
878 if (c < 0x045E) return false; if (c <= 0x0481) return true; | |
879 if (c < 0x0490) return false; if (c <= 0x04C4) return true; | |
880 if (c < 0x04C7) return false; if (c <= 0x04C8) return true; | |
881 if (c < 0x04CB) return false; if (c <= 0x04CC) return true; | |
882 if (c < 0x04D0) return false; if (c <= 0x04EB) return true; | |
883 if (c < 0x04EE) return false; if (c <= 0x04F5) return true; | |
884 if (c < 0x04F8) return false; if (c <= 0x04F9) return true; | |
885 if (c < 0x0531) return false; if (c <= 0x0556) return true; | |
886 if (c == 0x0559) return true; | |
887 if (c < 0x0561) return false; if (c <= 0x0586) return true; | |
888 if (c < 0x05D0) return false; if (c <= 0x05EA) return true; | |
889 if (c < 0x05F0) return false; if (c <= 0x05F2) return true; | |
890 if (c < 0x0621) return false; if (c <= 0x063A) return true; | |
891 if (c < 0x0641) return false; if (c <= 0x064A) return true; | |
892 if (c < 0x0671) return false; if (c <= 0x06B7) return true; | |
893 if (c < 0x06BA) return false; if (c <= 0x06BE) return true; | |
894 if (c < 0x06C0) return false; if (c <= 0x06CE) return true; | |
895 if (c < 0x06D0) return false; if (c <= 0x06D3) return true; | |
896 if (c == 0x06D5) return true; | |
897 if (c < 0x06E5) return false; if (c <= 0x06E6) return true; | |
898 if (c < 0x0905) return false; if (c <= 0x0939) return true; | |
899 if (c == 0x093D) return true; | |
900 if (c < 0x0958) return false; if (c <= 0x0961) return true; | |
901 if (c < 0x0985) return false; if (c <= 0x098C) return true; | |
902 if (c < 0x098F) return false; if (c <= 0x0990) return true; | |
903 if (c < 0x0993) return false; if (c <= 0x09A8) return true; | |
904 if (c < 0x09AA) return false; if (c <= 0x09B0) return true; | |
905 if (c == 0x09B2) return true; | |
906 if (c < 0x09B6) return false; if (c <= 0x09B9) return true; | |
907 if (c < 0x09DC) return false; if (c <= 0x09DD) return true; | |
908 if (c < 0x09DF) return false; if (c <= 0x09E1) return true; | |
909 if (c < 0x09F0) return false; if (c <= 0x09F1) return true; | |
910 if (c < 0x0A05) return false; if (c <= 0x0A0A) return true; | |
911 if (c < 0x0A0F) return false; if (c <= 0x0A10) return true; | |
912 if (c < 0x0A13) return false; if (c <= 0x0A28) return true; | |
913 if (c < 0x0A2A) return false; if (c <= 0x0A30) return true; | |
914 if (c < 0x0A32) return false; if (c <= 0x0A33) return true; | |
915 if (c < 0x0A35) return false; if (c <= 0x0A36) return true; | |
916 if (c < 0x0A38) return false; if (c <= 0x0A39) return true; | |
917 if (c < 0x0A59) return false; if (c <= 0x0A5C) return true; | |
918 if (c == 0x0A5E) return true; | |
919 if (c < 0x0A72) return false; if (c <= 0x0A74) return true; | |
920 if (c < 0x0A85) return false; if (c <= 0x0A8B) return true; | |
921 if (c == 0x0A8D) return true; | |
922 if (c < 0x0A8F) return false; if (c <= 0x0A91) return true; | |
923 if (c < 0x0A93) return false; if (c <= 0x0AA8) return true; | |
924 if (c < 0x0AAA) return false; if (c <= 0x0AB0) return true; | |
925 if (c < 0x0AB2) return false; if (c <= 0x0AB3) return true; | |
926 if (c < 0x0AB5) return false; if (c <= 0x0AB9) return true; | |
927 if (c == 0x0ABD) return true; | |
928 if (c == 0x0AE0) return true; | |
929 if (c < 0x0B05) return false; if (c <= 0x0B0C) return true; | |
930 if (c < 0x0B0F) return false; if (c <= 0x0B10) return true; | |
931 if (c < 0x0B13) return false; if (c <= 0x0B28) return true; | |
932 if (c < 0x0B2A) return false; if (c <= 0x0B30) return true; | |
933 if (c < 0x0B32) return false; if (c <= 0x0B33) return true; | |
934 if (c < 0x0B36) return false; if (c <= 0x0B39) return true; | |
935 if (c == 0x0B3D) return true; | |
936 if (c < 0x0B5C) return false; if (c <= 0x0B5D) return true; | |
937 if (c < 0x0B5F) return false; if (c <= 0x0B61) return true; | |
938 if (c < 0x0B85) return false; if (c <= 0x0B8A) return true; | |
939 if (c < 0x0B8E) return false; if (c <= 0x0B90) return true; | |
940 if (c < 0x0B92) return false; if (c <= 0x0B95) return true; | |
941 if (c < 0x0B99) return false; if (c <= 0x0B9A) return true; | |
942 if (c == 0x0B9C) return true; | |
943 if (c < 0x0B9E) return false; if (c <= 0x0B9F) return true; | |
944 if (c < 0x0BA3) return false; if (c <= 0x0BA4) return true; | |
945 if (c < 0x0BA8) return false; if (c <= 0x0BAA) return true; | |
946 if (c < 0x0BAE) return false; if (c <= 0x0BB5) return true; | |
947 if (c < 0x0BB7) return false; if (c <= 0x0BB9) return true; | |
948 if (c < 0x0C05) return false; if (c <= 0x0C0C) return true; | |
949 if (c < 0x0C0E) return false; if (c <= 0x0C10) return true; | |
950 if (c < 0x0C12) return false; if (c <= 0x0C28) return true; | |
951 if (c < 0x0C2A) return false; if (c <= 0x0C33) return true; | |
952 if (c < 0x0C35) return false; if (c <= 0x0C39) return true; | |
953 if (c < 0x0C60) return false; if (c <= 0x0C61) return true; | |
954 if (c < 0x0C85) return false; if (c <= 0x0C8C) return true; | |
955 if (c < 0x0C8E) return false; if (c <= 0x0C90) return true; | |
956 if (c < 0x0C92) return false; if (c <= 0x0CA8) return true; | |
957 if (c < 0x0CAA) return false; if (c <= 0x0CB3) return true; | |
958 if (c < 0x0CB5) return false; if (c <= 0x0CB9) return true; | |
959 if (c == 0x0CDE) return true; | |
960 if (c < 0x0CE0) return false; if (c <= 0x0CE1) return true; | |
961 if (c < 0x0D05) return false; if (c <= 0x0D0C) return true; | |
962 if (c < 0x0D0E) return false; if (c <= 0x0D10) return true; | |
963 if (c < 0x0D12) return false; if (c <= 0x0D28) return true; | |
964 if (c < 0x0D2A) return false; if (c <= 0x0D39) return true; | |
965 if (c < 0x0D60) return false; if (c <= 0x0D61) return true; | |
966 if (c < 0x0E01) return false; if (c <= 0x0E2E) return true; | |
967 if (c == 0x0E30) return true; | |
968 if (c < 0x0E32) return false; if (c <= 0x0E33) return true; | |
969 if (c < 0x0E40) return false; if (c <= 0x0E45) return true; | |
970 if (c < 0x0E81) return false; if (c <= 0x0E82) return true; | |
971 if (c == 0x0E84) return true; | |
972 if (c < 0x0E87) return false; if (c <= 0x0E88) return true; | |
973 if (c == 0x0E8A) return true; | |
974 if (c == 0x0E8D) return true; | |
975 if (c < 0x0E94) return false; if (c <= 0x0E97) return true; | |
976 if (c < 0x0E99) return false; if (c <= 0x0E9F) return true; | |
977 if (c < 0x0EA1) return false; if (c <= 0x0EA3) return true; | |
978 if (c == 0x0EA5) return true; | |
979 if (c == 0x0EA7) return true; | |
980 if (c < 0x0EAA) return false; if (c <= 0x0EAB) return true; | |
981 if (c < 0x0EAD) return false; if (c <= 0x0EAE) return true; | |
982 if (c == 0x0EB0) return true; | |
983 if (c < 0x0EB2) return false; if (c <= 0x0EB3) return true; | |
984 if (c == 0x0EBD) return true; | |
985 if (c < 0x0EC0) return false; if (c <= 0x0EC4) return true; | |
986 if (c < 0x0F40) return false; if (c <= 0x0F47) return true; | |
987 if (c < 0x0F49) return false; if (c <= 0x0F69) return true; | |
988 if (c < 0x10A0) return false; if (c <= 0x10C5) return true; | |
989 if (c < 0x10D0) return false; if (c <= 0x10F6) return true; | |
990 if (c == 0x1100) return true; | |
991 if (c < 0x1102) return false; if (c <= 0x1103) return true; | |
992 if (c < 0x1105) return false; if (c <= 0x1107) return true; | |
993 if (c == 0x1109) return true; | |
994 if (c < 0x110B) return false; if (c <= 0x110C) return true; | |
995 if (c < 0x110E) return false; if (c <= 0x1112) return true; | |
996 if (c == 0x113C) return true; | |
997 if (c == 0x113E) return true; | |
998 if (c == 0x1140) return true; | |
999 if (c == 0x114C) return true; | |
1000 if (c == 0x114E) return true; | |
1001 if (c == 0x1150) return true; | |
1002 if (c < 0x1154) return false; if (c <= 0x1155) return true; | |
1003 if (c == 0x1159) return true; | |
1004 if (c < 0x115F) return false; if (c <= 0x1161) return true; | |
1005 if (c == 0x1163) return true; | |
1006 if (c == 0x1165) return true; | |
1007 if (c == 0x1167) return true; | |
1008 if (c == 0x1169) return true; | |
1009 if (c < 0x116D) return false; if (c <= 0x116E) return true; | |
1010 if (c < 0x1172) return false; if (c <= 0x1173) return true; | |
1011 if (c == 0x1175) return true; | |
1012 if (c == 0x119E) return true; | |
1013 if (c == 0x11A8) return true; | |
1014 if (c == 0x11AB) return true; | |
1015 if (c < 0x11AE) return false; if (c <= 0x11AF) return true; | |
1016 if (c < 0x11B7) return false; if (c <= 0x11B8) return true; | |
1017 if (c == 0x11BA) return true; | |
1018 if (c < 0x11BC) return false; if (c <= 0x11C2) return true; | |
1019 if (c == 0x11EB) return true; | |
1020 if (c == 0x11F0) return true; | |
1021 if (c == 0x11F9) return true; | |
1022 if (c < 0x1E00) return false; if (c <= 0x1E9B) return true; | |
1023 if (c < 0x1EA0) return false; if (c <= 0x1EF9) return true; | |
1024 if (c < 0x1F00) return false; if (c <= 0x1F15) return true; | |
1025 if (c < 0x1F18) return false; if (c <= 0x1F1D) return true; | |
1026 if (c < 0x1F20) return false; if (c <= 0x1F45) return true; | |
1027 if (c < 0x1F48) return false; if (c <= 0x1F4D) return true; | |
1028 if (c < 0x1F50) return false; if (c <= 0x1F57) return true; | |
1029 if (c == 0x1F59) return true; | |
1030 if (c == 0x1F5B) return true; | |
1031 if (c == 0x1F5D) return true; | |
1032 if (c < 0x1F5F) return false; if (c <= 0x1F7D) return true; | |
1033 if (c < 0x1F80) return false; if (c <= 0x1FB4) return true; | |
1034 if (c < 0x1FB6) return false; if (c <= 0x1FBC) return true; | |
1035 if (c == 0x1FBE) return true; | |
1036 if (c < 0x1FC2) return false; if (c <= 0x1FC4) return true; | |
1037 if (c < 0x1FC6) return false; if (c <= 0x1FCC) return true; | |
1038 if (c < 0x1FD0) return false; if (c <= 0x1FD3) return true; | |
1039 if (c < 0x1FD6) return false; if (c <= 0x1FDB) return true; | |
1040 if (c < 0x1FE0) return false; if (c <= 0x1FEC) return true; | |
1041 if (c < 0x1FF2) return false; if (c <= 0x1FF4) return true; | |
1042 if (c < 0x1FF6) return false; if (c <= 0x1FFC) return true; | |
1043 if (c == 0x2126) return true; | |
1044 if (c < 0x212A) return false; if (c <= 0x212B) return true; | |
1045 if (c == 0x212E) return true; | |
1046 if (c < 0x2180) return false; if (c <= 0x2182) return true; | |
1047 if (c == 0x3007) return true; // ideographic | |
1048 if (c < 0x3021) return false; if (c <= 0x3029) return true; // ideo | |
1049 if (c < 0x3041) return false; if (c <= 0x3094) return true; | |
1050 if (c < 0x30A1) return false; if (c <= 0x30FA) return true; | |
1051 if (c < 0x3105) return false; if (c <= 0x312C) return true; | |
1052 if (c < 0x4E00) return false; if (c <= 0x9FA5) return true; // ideo | |
1053 if (c < 0xAC00) return false; if (c <= 0xD7A3) return true; | |
1054 | |
1055 return false; | |
1056 | |
1057 } | |
1058 | |
1059 /** | |
1060 * This is a utility function for determining whether a specified character | |
1061 * is a combining character according to production 87 | |
1062 * of the XML 1.0 specification. | |
1063 * | |
1064 * @param c <code>char</code> to check. | |
1065 * @return <code>boolean</code> true if it's a combining character, | |
1066 * false otherwise. | |
1067 */ | |
1068 public static boolean isXMLCombiningChar(char c) { | |
1069 // CombiningChar | |
1070 if (c < 0x0300) return false; if (c <= 0x0345) return true; | |
1071 if (c < 0x0360) return false; if (c <= 0x0361) return true; | |
1072 if (c < 0x0483) return false; if (c <= 0x0486) return true; | |
1073 if (c < 0x0591) return false; if (c <= 0x05A1) return true; | |
1074 | |
1075 if (c < 0x05A3) return false; if (c <= 0x05B9) return true; | |
1076 if (c < 0x05BB) return false; if (c <= 0x05BD) return true; | |
1077 if (c == 0x05BF) return true; | |
1078 if (c < 0x05C1) return false; if (c <= 0x05C2) return true; | |
1079 | |
1080 if (c == 0x05C4) return true; | |
1081 if (c < 0x064B) return false; if (c <= 0x0652) return true; | |
1082 if (c == 0x0670) return true; | |
1083 if (c < 0x06D6) return false; if (c <= 0x06DC) return true; | |
1084 | |
1085 if (c < 0x06DD) return false; if (c <= 0x06DF) return true; | |
1086 if (c < 0x06E0) return false; if (c <= 0x06E4) return true; | |
1087 if (c < 0x06E7) return false; if (c <= 0x06E8) return true; | |
1088 | |
1089 if (c < 0x06EA) return false; if (c <= 0x06ED) return true; | |
1090 if (c < 0x0901) return false; if (c <= 0x0903) return true; | |
1091 if (c == 0x093C) return true; | |
1092 if (c < 0x093E) return false; if (c <= 0x094C) return true; | |
1093 | |
1094 if (c == 0x094D) return true; | |
1095 if (c < 0x0951) return false; if (c <= 0x0954) return true; | |
1096 if (c < 0x0962) return false; if (c <= 0x0963) return true; | |
1097 if (c < 0x0981) return false; if (c <= 0x0983) return true; | |
1098 | |
1099 if (c == 0x09BC) return true; | |
1100 if (c == 0x09BE) return true; | |
1101 if (c == 0x09BF) return true; | |
1102 if (c < 0x09C0) return false; if (c <= 0x09C4) return true; | |
1103 if (c < 0x09C7) return false; if (c <= 0x09C8) return true; | |
1104 | |
1105 if (c < 0x09CB) return false; if (c <= 0x09CD) return true; | |
1106 if (c == 0x09D7) return true; | |
1107 if (c < 0x09E2) return false; if (c <= 0x09E3) return true; | |
1108 if (c == 0x0A02) return true; | |
1109 if (c == 0x0A3C) return true; | |
1110 | |
1111 if (c == 0x0A3E) return true; | |
1112 if (c == 0x0A3F) return true; | |
1113 if (c < 0x0A40) return false; if (c <= 0x0A42) return true; | |
1114 if (c < 0x0A47) return false; if (c <= 0x0A48) return true; | |
1115 | |
1116 if (c < 0x0A4B) return false; if (c <= 0x0A4D) return true; | |
1117 if (c < 0x0A70) return false; if (c <= 0x0A71) return true; | |
1118 if (c < 0x0A81) return false; if (c <= 0x0A83) return true; | |
1119 if (c == 0x0ABC) return true; | |
1120 | |
1121 if (c < 0x0ABE) return false; if (c <= 0x0AC5) return true; | |
1122 if (c < 0x0AC7) return false; if (c <= 0x0AC9) return true; | |
1123 if (c < 0x0ACB) return false; if (c <= 0x0ACD) return true; | |
1124 | |
1125 if (c < 0x0B01) return false; if (c <= 0x0B03) return true; | |
1126 if (c == 0x0B3C) return true; | |
1127 if (c < 0x0B3E) return false; if (c <= 0x0B43) return true; | |
1128 if (c < 0x0B47) return false; if (c <= 0x0B48) return true; | |
1129 | |
1130 if (c < 0x0B4B) return false; if (c <= 0x0B4D) return true; | |
1131 if (c < 0x0B56) return false; if (c <= 0x0B57) return true; | |
1132 if (c < 0x0B82) return false; if (c <= 0x0B83) return true; | |
1133 | |
1134 if (c < 0x0BBE) return false; if (c <= 0x0BC2) return true; | |
1135 if (c < 0x0BC6) return false; if (c <= 0x0BC8) return true; | |
1136 if (c < 0x0BCA) return false; if (c <= 0x0BCD) return true; | |
1137 if (c == 0x0BD7) return true; | |
1138 | |
1139 if (c < 0x0C01) return false; if (c <= 0x0C03) return true; | |
1140 if (c < 0x0C3E) return false; if (c <= 0x0C44) return true; | |
1141 if (c < 0x0C46) return false; if (c <= 0x0C48) return true; | |
1142 | |
1143 if (c < 0x0C4A) return false; if (c <= 0x0C4D) return true; | |
1144 if (c < 0x0C55) return false; if (c <= 0x0C56) return true; | |
1145 if (c < 0x0C82) return false; if (c <= 0x0C83) return true; | |
1146 | |
1147 if (c < 0x0CBE) return false; if (c <= 0x0CC4) return true; | |
1148 if (c < 0x0CC6) return false; if (c <= 0x0CC8) return true; | |
1149 if (c < 0x0CCA) return false; if (c <= 0x0CCD) return true; | |
1150 | |
1151 if (c < 0x0CD5) return false; if (c <= 0x0CD6) return true; | |
1152 if (c < 0x0D02) return false; if (c <= 0x0D03) return true; | |
1153 if (c < 0x0D3E) return false; if (c <= 0x0D43) return true; | |
1154 | |
1155 if (c < 0x0D46) return false; if (c <= 0x0D48) return true; | |
1156 if (c < 0x0D4A) return false; if (c <= 0x0D4D) return true; | |
1157 if (c == 0x0D57) return true; | |
1158 if (c == 0x0E31) return true; | |
1159 | |
1160 if (c < 0x0E34) return false; if (c <= 0x0E3A) return true; | |
1161 if (c < 0x0E47) return false; if (c <= 0x0E4E) return true; | |
1162 if (c == 0x0EB1) return true; | |
1163 if (c < 0x0EB4) return false; if (c <= 0x0EB9) return true; | |
1164 | |
1165 if (c < 0x0EBB) return false; if (c <= 0x0EBC) return true; | |
1166 if (c < 0x0EC8) return false; if (c <= 0x0ECD) return true; | |
1167 if (c < 0x0F18) return false; if (c <= 0x0F19) return true; | |
1168 if (c == 0x0F35) return true; | |
1169 | |
1170 if (c == 0x0F37) return true; | |
1171 if (c == 0x0F39) return true; | |
1172 if (c == 0x0F3E) return true; | |
1173 if (c == 0x0F3F) return true; | |
1174 if (c < 0x0F71) return false; if (c <= 0x0F84) return true; | |
1175 | |
1176 if (c < 0x0F86) return false; if (c <= 0x0F8B) return true; | |
1177 if (c < 0x0F90) return false; if (c <= 0x0F95) return true; | |
1178 if (c == 0x0F97) return true; | |
1179 if (c < 0x0F99) return false; if (c <= 0x0FAD) return true; | |
1180 | |
1181 if (c < 0x0FB1) return false; if (c <= 0x0FB7) return true; | |
1182 if (c == 0x0FB9) return true; | |
1183 if (c < 0x20D0) return false; if (c <= 0x20DC) return true; | |
1184 if (c == 0x20E1) return true; | |
1185 | |
1186 if (c < 0x302A) return false; if (c <= 0x302F) return true; | |
1187 if (c == 0x3099) return true; | |
1188 if (c == 0x309A) return true; | |
1189 | |
1190 return false; | |
1191 | |
1192 } | |
1193 | |
1194 /** | |
1195 * This is a utility function for determining whether a specified | |
1196 * character is an extender according to production 88 of the XML 1.0 | |
1197 * specification. | |
1198 * | |
1199 * @param c <code>char</code> to check. | |
1200 * @return <code>String</code> true if it's an extender, false otherwise. | |
1201 */ | |
1202 public static boolean isXMLExtender(char c) { | |
1203 | |
1204 if (c < 0x00B6) return false; // quick short circuit | |
1205 | |
1206 // Extenders | |
1207 if (c == 0x00B7) return true; | |
1208 if (c == 0x02D0) return true; | |
1209 if (c == 0x02D1) return true; | |
1210 if (c == 0x0387) return true; | |
1211 if (c == 0x0640) return true; | |
1212 if (c == 0x0E46) return true; | |
1213 if (c == 0x0EC6) return true; | |
1214 if (c == 0x3005) return true; | |
1215 | |
1216 if (c < 0x3031) return false; if (c <= 0x3035) return true; | |
1217 if (c < 0x309D) return false; if (c <= 0x309E) return true; | |
1218 if (c < 0x30FC) return false; if (c <= 0x30FE) return true; | |
1219 | |
1220 return false; | |
1221 | |
1222 } | |
1223 | |
1224 /** | |
1225 * This is a utility function for determining whether a specified | |
1226 * Unicode character | |
1227 * is a digit according to production 88 of the XML 1.0 specification. | |
1228 * | |
1229 * @param c <code>char</code> to check for XML digit compliance | |
1230 * @return <code>boolean</code> true if it's a digit, false otherwise | |
1231 */ | |
1232 public static boolean isXMLDigit(char c) { | |
1233 | |
1234 if (c < 0x0030) return false; if (c <= 0x0039) return true; | |
1235 if (c < 0x0660) return false; if (c <= 0x0669) return true; | |
1236 if (c < 0x06F0) return false; if (c <= 0x06F9) return true; | |
1237 if (c < 0x0966) return false; if (c <= 0x096F) return true; | |
1238 | |
1239 if (c < 0x09E6) return false; if (c <= 0x09EF) return true; | |
1240 if (c < 0x0A66) return false; if (c <= 0x0A6F) return true; | |
1241 if (c < 0x0AE6) return false; if (c <= 0x0AEF) return true; | |
1242 | |
1243 if (c < 0x0B66) return false; if (c <= 0x0B6F) return true; | |
1244 if (c < 0x0BE7) return false; if (c <= 0x0BEF) return true; | |
1245 if (c < 0x0C66) return false; if (c <= 0x0C6F) return true; | |
1246 | |
1247 if (c < 0x0CE6) return false; if (c <= 0x0CEF) return true; | |
1248 if (c < 0x0D66) return false; if (c <= 0x0D6F) return true; | |
1249 if (c < 0x0E50) return false; if (c <= 0x0E59) return true; | |
1250 | |
1251 if (c < 0x0ED0) return false; if (c <= 0x0ED9) return true; | |
1252 if (c < 0x0F20) return false; if (c <= 0x0F29) return true; | |
1253 | |
1254 return false; | |
1255 } | |
1256 | |
1257 /** | |
1258 * This is a utility function for determining whether a specified | |
1259 * Unicode character is a whitespace character according to production 3 | |
1260 * of the XML 1.0 specification. | |
1261 * | |
1262 * @param c <code>char</code> to check for XML whitespace compliance | |
1263 * @return <code>boolean</code> true if it's a whitespace, false otherwise | |
1264 */ | |
1265 public static boolean isXMLWhitespace(char c) { | |
1266 if (c==' ' || c=='\n' || c=='\t' || c=='\r' ){ | |
1267 return true; | |
1268 } | |
1269 return false; | |
1270 } | |
1271 } |