Mercurial > repos > pfrommolt > ngsrich
comparison NGSrich_0.5.5/src/org/jdom/Verifier.java @ 0:89ad0a9cca52 default tip
Uploaded
| author | pfrommolt |
|---|---|
| date | Mon, 21 Nov 2011 08:12:19 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:89ad0a9cca52 |
|---|---|
| 1 /*-- | |
| 2 | |
| 3 $Id: Verifier.java,v 1.57 2009/07/23 05:54:23 jhunter Exp $ | |
| 4 | |
| 5 Copyright (C) 2000-2007 Jason Hunter & Brett McLaughlin. | |
| 6 All rights reserved. | |
| 7 | |
| 8 Redistribution and use in source and binary forms, with or without | |
| 9 modification, are permitted provided that the following conditions | |
| 10 are met: | |
| 11 | |
| 12 1. Redistributions of source code must retain the above copyright | |
| 13 notice, this list of conditions, and the following disclaimer. | |
| 14 | |
| 15 2. Redistributions in binary form must reproduce the above copyright | |
| 16 notice, this list of conditions, and the disclaimer that follows | |
| 17 these conditions in the documentation and/or other materials | |
| 18 provided with the distribution. | |
| 19 | |
| 20 3. The name "JDOM" must not be used to endorse or promote products | |
| 21 derived from this software without prior written permission. For | |
| 22 written permission, please contact <request_AT_jdom_DOT_org>. | |
| 23 | |
| 24 4. Products derived from this software may not be called "JDOM", nor | |
| 25 may "JDOM" appear in their name, without prior written permission | |
| 26 from the JDOM Project Management <request_AT_jdom_DOT_org>. | |
| 27 | |
| 28 In addition, we request (but do not require) that you include in the | |
| 29 end-user documentation provided with the redistribution and/or in the | |
| 30 software itself an acknowledgement equivalent to the following: | |
| 31 "This product includes software developed by the | |
| 32 JDOM Project (http://www.jdom.org/)." | |
| 33 Alternatively, the acknowledgment may be graphical using the logos | |
| 34 available at http://www.jdom.org/images/logos. | |
| 35 | |
| 36 THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED | |
| 37 WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES | |
| 38 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | |
| 39 DISCLAIMED. IN NO EVENT SHALL THE JDOM AUTHORS OR THE PROJECT | |
| 40 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
| 41 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
| 42 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF | |
| 43 USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND | |
| 44 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |
| 45 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT | |
| 46 OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
| 47 SUCH DAMAGE. | |
| 48 | |
| 49 This software consists of voluntary contributions made by many | |
| 50 individuals on behalf of the JDOM Project and was originally | |
| 51 created by Jason Hunter <jhunter_AT_jdom_DOT_org> and | |
| 52 Brett McLaughlin <brett_AT_jdom_DOT_org>. For more information | |
| 53 on the JDOM Project, please see <http://www.jdom.org/>. | |
| 54 | |
| 55 */ | |
| 56 | |
| 57 package org.jdom; | |
| 58 | |
| 59 import java.util.*; | |
| 60 | |
| 61 /** | |
| 62 * A utility class to handle well-formedness checks on names, data, and other | |
| 63 * verification tasks for JDOM. The class is final and may not be subclassed. | |
| 64 * | |
| 65 * @version $Revision: 1.57 $, $Date: 2009/07/23 05:54:23 $ | |
| 66 * @author Brett McLaughlin | |
| 67 * @author Elliotte Rusty Harold | |
| 68 * @author Jason Hunter | |
| 69 * @author Bradley S. Huffman | |
| 70 */ | |
| 71 final public class Verifier { | |
| 72 | |
| 73 private static final String CVS_ID = | |
| 74 "@(#) $RCSfile: Verifier.java,v $ $Revision: 1.57 $ $Date: 2009/07/23 05:54:23 $ $Name: jdom_1_1_1 $"; | |
| 75 | |
| 76 /** | |
| 77 * Ensure instantation cannot occur. | |
| 78 */ | |
| 79 private Verifier() { } | |
| 80 | |
| 81 /** | |
| 82 * This will check the supplied name to see if it is legal for use as | |
| 83 * a JDOM <code>{@link Element}</code> name. | |
| 84 * | |
| 85 * @param name <code>String</code> name to check. | |
| 86 * @return <code>String</code> reason name is illegal, or | |
| 87 * <code>null</code> if name is OK. | |
| 88 */ | |
| 89 public static String checkElementName(String name) { | |
| 90 // Check basic XML name rules first | |
| 91 String reason; | |
| 92 if ((reason = checkXMLName(name)) != null) { | |
| 93 return reason; | |
| 94 } | |
| 95 | |
| 96 // No colons allowed, since elements handle this internally | |
| 97 if (name.indexOf(":") != -1) { | |
| 98 return "Element names cannot contain colons"; | |
| 99 } | |
| 100 | |
| 101 // If we got here, everything is OK | |
| 102 return null; | |
| 103 } | |
| 104 | |
| 105 /** | |
| 106 * This will check the supplied name to see if it is legal for use as | |
| 107 * a JDOM <code>{@link Attribute}</code> name. | |
| 108 * | |
| 109 * @param name <code>String</code> name to check. | |
| 110 * @return <code>String</code> reason name is illegal, or | |
| 111 * <code>null</code> if name is OK. | |
| 112 */ | |
| 113 public static String checkAttributeName(String name) { | |
| 114 // Check basic XML name rules first | |
| 115 String reason; | |
| 116 if ((reason = checkXMLName(name)) != null) { | |
| 117 return reason; | |
| 118 } | |
| 119 | |
| 120 // No colons are allowed, since attributes handle this internally | |
| 121 if (name.indexOf(":") != -1) { | |
| 122 return "Attribute names cannot contain colons"; | |
| 123 } | |
| 124 | |
| 125 // Attribute names may not be xmlns since we do this internally too | |
| 126 if (name.equals("xmlns")) { | |
| 127 return "An Attribute name may not be \"xmlns\"; " + | |
| 128 "use the Namespace class to manage namespaces"; | |
| 129 } | |
| 130 | |
| 131 // If we got here, everything is OK | |
| 132 return null; | |
| 133 } | |
| 134 | |
| 135 /** | |
| 136 * This will check the supplied string to see if it only contains | |
| 137 * characters allowed by the XML 1.0 specification. The C0 controls | |
| 138 * (e.g. null, vertical tab, formfeed, etc.) are specifically excluded | |
| 139 * except for carriage return, linefeed, and the horizontal tab. | |
| 140 * Surrogates are also excluded. | |
| 141 * <p> | |
| 142 * This method is useful for checking element content and attribute | |
| 143 * values. Note that characters | |
| 144 * like " and < are allowed in attribute values and element content. | |
| 145 * They will simply be escaped as " or < | |
| 146 * when the value is serialized. | |
| 147 * </p> | |
| 148 * | |
| 149 * @param text <code>String</code> value to check. | |
| 150 * @return <code>String</code> reason name is illegal, or | |
| 151 * <code>null</code> if name is OK. | |
| 152 */ | |
| 153 public static String checkCharacterData(String text) { | |
| 154 if (text == null) { | |
| 155 return "A null is not a legal XML value"; | |
| 156 } | |
| 157 | |
| 158 // Do check | |
| 159 for (int i = 0, len = text.length(); i<len; i++) { | |
| 160 | |
| 161 int ch = text.charAt(i); | |
| 162 | |
| 163 // Check if high part of a surrogate pair | |
| 164 if (isHighSurrogate((char) ch)) { | |
| 165 // Check if next char is the low-surrogate | |
| 166 i++; | |
| 167 if (i < len) { | |
| 168 char low = text.charAt(i); | |
| 169 if (!isLowSurrogate(low)) { | |
| 170 return "Illegal Surrogate Pair"; | |
| 171 } | |
| 172 // It's a good pair, calculate the true value of | |
| 173 // the character to then fall thru to isXMLCharacter | |
| 174 ch = decodeSurrogatePair((char) ch, low); | |
| 175 } | |
| 176 else { | |
| 177 return "Surrogate Pair Truncated"; | |
| 178 } | |
| 179 } | |
| 180 | |
| 181 if (!isXMLCharacter(ch)) { | |
| 182 // Likely this character can't be easily displayed | |
| 183 // because it's a control so we use it'd hexadecimal | |
| 184 // representation in the reason. | |
| 185 return ("0x" + Integer.toHexString(ch) + | |
| 186 " is not a legal XML character"); | |
| 187 } | |
| 188 } | |
| 189 | |
| 190 // If we got here, everything is OK | |
| 191 return null; | |
| 192 } | |
| 193 | |
| 194 /** | |
| 195 * This will check the supplied data to see if it is legal for use as | |
| 196 * JDOM <code>{@link CDATA}</code>. | |
| 197 * | |
| 198 * @param data <code>String</code> data to check. | |
| 199 * @return <code>String</code> reason data is illegal, or | |
| 200 * <code>null</code> is name is OK. | |
| 201 */ | |
| 202 public static String checkCDATASection(String data) { | |
| 203 String reason = null; | |
| 204 if ((reason = checkCharacterData(data)) != null) { | |
| 205 return reason; | |
| 206 } | |
| 207 | |
| 208 if (data.indexOf("]]>") != -1) { | |
| 209 return "CDATA cannot internally contain a CDATA ending " + | |
| 210 "delimiter (]]>)"; | |
| 211 } | |
| 212 | |
| 213 // If we got here, everything is OK | |
| 214 return null; | |
| 215 } | |
| 216 | |
| 217 /** | |
| 218 * This will check the supplied name to see if it is legal for use as | |
| 219 * a JDOM <code>{@link Namespace}</code> prefix. | |
| 220 * | |
| 221 * @param prefix <code>String</code> prefix to check. | |
| 222 * @return <code>String</code> reason name is illegal, or | |
| 223 * <code>null</code> if name is OK. | |
| 224 */ | |
| 225 public static String checkNamespacePrefix(String prefix) { | |
| 226 // Manually do rules, since URIs can be null or empty | |
| 227 if ((prefix == null) || (prefix.equals(""))) { | |
| 228 return null; | |
| 229 } | |
| 230 | |
| 231 // Cannot start with a number | |
| 232 char first = prefix.charAt(0); | |
| 233 if (isXMLDigit(first)) { | |
| 234 return "Namespace prefixes cannot begin with a number"; | |
| 235 } | |
| 236 // Cannot start with a $ | |
| 237 if (first == '$') { | |
| 238 return "Namespace prefixes cannot begin with a dollar sign ($)"; | |
| 239 } | |
| 240 // Cannot start with a - | |
| 241 if (first == '-') { | |
| 242 return "Namespace prefixes cannot begin with a hyphen (-)"; | |
| 243 } | |
| 244 // Cannot start with a . | |
| 245 if (first == '.') { | |
| 246 return "Namespace prefixes cannot begin with a period (.)"; | |
| 247 } | |
| 248 // Cannot start with "xml" in any character case | |
| 249 if (prefix.toLowerCase().startsWith("xml")) { | |
| 250 return "Namespace prefixes cannot begin with " + | |
| 251 "\"xml\" in any combination of case"; | |
| 252 } | |
| 253 | |
| 254 // Ensure legal content | |
| 255 for (int i=0, len = prefix.length(); i<len; i++) { | |
| 256 char c = prefix.charAt(i); | |
| 257 if (!isXMLNameCharacter(c)) { | |
| 258 return "Namespace prefixes cannot contain the character \"" + | |
| 259 c + "\""; | |
| 260 } | |
| 261 } | |
| 262 | |
| 263 // No colons allowed | |
| 264 if (prefix.indexOf(":") != -1) { | |
| 265 return "Namespace prefixes cannot contain colons"; | |
| 266 } | |
| 267 | |
| 268 // If we got here, everything is OK | |
| 269 return null; | |
| 270 } | |
| 271 | |
| 272 /** | |
| 273 * This will check the supplied name to see if it is legal for use as | |
| 274 * a JDOM <code>{@link Namespace}</code> URI. | |
| 275 * | |
| 276 * @param uri <code>String</code> URI to check. | |
| 277 * @return <code>String</code> reason name is illegal, or | |
| 278 * <code>null</code> if name is OK. | |
| 279 */ | |
| 280 public static String checkNamespaceURI(String uri) { | |
| 281 // Manually do rules, since URIs can be null or empty | |
| 282 if ((uri == null) || (uri.equals(""))) { | |
| 283 return null; | |
| 284 } | |
| 285 | |
| 286 // Cannot start with a number | |
| 287 char first = uri.charAt(0); | |
| 288 if (Character.isDigit(first)) { | |
| 289 return "Namespace URIs cannot begin with a number"; | |
| 290 } | |
| 291 // Cannot start with a $ | |
| 292 if (first == '$') { | |
| 293 return "Namespace URIs cannot begin with a dollar sign ($)"; | |
| 294 } | |
| 295 // Cannot start with a - | |
| 296 if (first == '-') { | |
| 297 return "Namespace URIs cannot begin with a hyphen (-)"; | |
| 298 } | |
| 299 | |
| 300 // If we got here, everything is OK | |
| 301 return null; | |
| 302 } | |
| 303 | |
| 304 /** | |
| 305 * Check if two namespaces collide. | |
| 306 * | |
| 307 * @param namespace <code>Namespace</code> to check. | |
| 308 * @param other <code>Namespace</code> to check against. | |
| 309 * @return <code>String</code> reason for collision, or | |
| 310 * <code>null</code> if no collision. | |
| 311 */ | |
| 312 public static String checkNamespaceCollision(Namespace namespace, | |
| 313 Namespace other) { | |
| 314 String p1,p2,u1,u2,reason; | |
| 315 | |
| 316 reason = null; | |
| 317 p1 = namespace.getPrefix(); | |
| 318 u1 = namespace.getURI(); | |
| 319 p2 = other.getPrefix(); | |
| 320 u2 = other.getURI(); | |
| 321 if (p1.equals(p2) && !u1.equals(u2)) { | |
| 322 reason = "The namespace prefix \"" + p1 + "\" collides"; | |
| 323 } | |
| 324 return reason; | |
| 325 } | |
| 326 | |
| 327 /** | |
| 328 * Check if <code>{@link Attribute}</code>'s namespace collides with a | |
| 329 * <code>{@link Element}</code>'s namespace. | |
| 330 * | |
| 331 * @param attribute <code>Attribute</code> to check. | |
| 332 * @param element <code>Element</code> to check against. | |
| 333 * @return <code>String</code> reason for collision, or | |
| 334 * <code>null</code> if no collision. | |
| 335 */ | |
| 336 public static String checkNamespaceCollision(Attribute attribute, | |
| 337 Element element) { | |
| 338 Namespace namespace = attribute.getNamespace(); | |
| 339 String prefix = namespace.getPrefix(); | |
| 340 if ("".equals(prefix)) { | |
| 341 return null; | |
| 342 } | |
| 343 | |
| 344 return checkNamespaceCollision(namespace, element); | |
| 345 } | |
| 346 | |
| 347 /** | |
| 348 * Check if a <code>{@link Namespace}</code> collides with a | |
| 349 * <code>{@link Element}</code>'s namespace. | |
| 350 * | |
| 351 * @param namespace <code>Namespace</code> to check. | |
| 352 * @param element <code>Element</code> to check against. | |
| 353 * @return <code>String</code> reason for collision, or | |
| 354 * <code>null</code> if no collision. | |
| 355 */ | |
| 356 public static String checkNamespaceCollision(Namespace namespace, | |
| 357 Element element) { | |
| 358 String reason = checkNamespaceCollision(namespace, | |
| 359 element.getNamespace()); | |
| 360 if (reason != null) { | |
| 361 return reason + " with the element namespace prefix"; | |
| 362 } | |
| 363 | |
| 364 reason = checkNamespaceCollision(namespace, | |
| 365 element.getAdditionalNamespaces()); | |
| 366 if (reason != null) { | |
| 367 return reason; | |
| 368 } | |
| 369 | |
| 370 reason = checkNamespaceCollision(namespace, element.getAttributes()); | |
| 371 if (reason != null) { | |
| 372 return reason; | |
| 373 } | |
| 374 | |
| 375 return null; | |
| 376 } | |
| 377 | |
| 378 /** | |
| 379 * Check if a <code>{@link Namespace}</code> collides with a | |
| 380 * <code>{@link Attribute}</code>'s namespace. | |
| 381 * | |
| 382 * @param namespace <code>Namespace</code> to check. | |
| 383 * @param attribute <code>Attribute</code> to check against. | |
| 384 * @return <code>String</code> reason for collision, or | |
| 385 * <code>null</code> if no collision. | |
| 386 */ | |
| 387 public static String checkNamespaceCollision(Namespace namespace, | |
| 388 Attribute attribute) { | |
| 389 String reason = null; | |
| 390 if (!attribute.getNamespace().equals(Namespace.NO_NAMESPACE)) { | |
| 391 reason = checkNamespaceCollision(namespace, | |
| 392 attribute.getNamespace()); | |
| 393 if (reason != null) { | |
| 394 reason += " with an attribute namespace prefix on the element"; | |
| 395 } | |
| 396 } | |
| 397 return reason; | |
| 398 } | |
| 399 | |
| 400 /** | |
| 401 * Check if a <code>{@link Namespace}</code> collides with any namespace | |
| 402 * from a list of objects. | |
| 403 * | |
| 404 * @param namespace <code>Namespace</code> to check. | |
| 405 * @param list <code>List</code> to check against. | |
| 406 * @return <code>String</code> reason for collision, or | |
| 407 * <code>null</code> if no collision. | |
| 408 */ | |
| 409 public static String checkNamespaceCollision(Namespace namespace, | |
| 410 List list) { | |
| 411 if (list == null) { | |
| 412 return null; | |
| 413 } | |
| 414 | |
| 415 String reason = null; | |
| 416 Iterator i = list.iterator(); | |
| 417 while ((reason == null) && i.hasNext()) { | |
| 418 Object obj = i.next(); | |
| 419 if (obj instanceof Attribute) { | |
| 420 reason = checkNamespaceCollision(namespace, (Attribute) obj); | |
| 421 } | |
| 422 else if (obj instanceof Element) { | |
| 423 reason = checkNamespaceCollision(namespace, (Element) obj); | |
| 424 } | |
| 425 else if (obj instanceof Namespace) { | |
| 426 reason = checkNamespaceCollision(namespace, (Namespace) obj); | |
| 427 if (reason != null) { | |
| 428 reason += " with an additional namespace declared" + | |
| 429 " by the element"; | |
| 430 } | |
| 431 } | |
| 432 } | |
| 433 return reason; | |
| 434 } | |
| 435 | |
| 436 /** | |
| 437 * This will check the supplied data to see if it is legal for use as | |
| 438 * a JDOM <code>{@link ProcessingInstruction}</code> target. | |
| 439 * | |
| 440 * @param target <code>String</code> target to check. | |
| 441 * @return <code>String</code> reason target is illegal, or | |
| 442 * <code>null</code> if target is OK. | |
| 443 */ | |
| 444 public static String checkProcessingInstructionTarget(String target) { | |
| 445 // Check basic XML name rules first | |
| 446 String reason; | |
| 447 if ((reason = checkXMLName(target)) != null) { | |
| 448 return reason; | |
| 449 } | |
| 450 | |
| 451 // No colons allowed, per Namespace Specification Section 6 | |
| 452 if (target.indexOf(":") != -1) { | |
| 453 return "Processing instruction targets cannot contain colons"; | |
| 454 } | |
| 455 | |
| 456 // Cannot begin with 'xml' in any case | |
| 457 if (target.equalsIgnoreCase("xml")) { | |
| 458 return "Processing instructions cannot have a target of " + | |
| 459 "\"xml\" in any combination of case. (Note that the " + | |
| 460 "\"<?xml ... ?>\" declaration at the beginning of a " + | |
| 461 "document is not a processing instruction and should not " + | |
| 462 "be added as one; it is written automatically during " + | |
| 463 "output, e.g. by XMLOutputter.)"; | |
| 464 } | |
| 465 | |
| 466 // If we got here, everything is OK | |
| 467 return null; | |
| 468 } | |
| 469 | |
| 470 /** | |
| 471 * This will check the supplied data to see if it is legal for use as | |
| 472 * <code>{@link ProcessingInstruction}</code> data. Besides checking that | |
| 473 * all the characters are allowed in XML, this also checks | |
| 474 * that the data does not contain the PI end-string "?>". | |
| 475 * | |
| 476 * @param data <code>String</code> data to check. | |
| 477 * @return <code>String</code> reason data is illegal, or | |
| 478 * <code>null</code> if data is OK. | |
| 479 */ | |
| 480 public static String checkProcessingInstructionData(String data) { | |
| 481 // Check basic XML name rules first | |
| 482 String reason = checkCharacterData(data); | |
| 483 | |
| 484 if (reason == null) { | |
| 485 if (data.indexOf("?>") >= 0) { | |
| 486 return "Processing instructions cannot contain " + | |
| 487 "the string \"?>\""; | |
| 488 } | |
| 489 } | |
| 490 | |
| 491 return reason; | |
| 492 } | |
| 493 | |
| 494 /** | |
| 495 * This will check the supplied data to see if it is legal for use as | |
| 496 * JDOM <code>{@link Comment}</code> data. | |
| 497 * | |
| 498 * @param data <code>String</code> data to check. | |
| 499 * @return <code>String</code> reason data is illegal, or | |
| 500 * <code>null</code> if data is OK. | |
| 501 */ | |
| 502 public static String checkCommentData(String data) { | |
| 503 String reason = null; | |
| 504 if ((reason = checkCharacterData(data)) != null) { | |
| 505 return reason; | |
| 506 } | |
| 507 | |
| 508 if (data.indexOf("--") != -1) { | |
| 509 return "Comments cannot contain double hyphens (--)"; | |
| 510 } | |
| 511 if (data.endsWith("-")) { | |
| 512 return "Comment data cannot end with a hyphen."; | |
| 513 } | |
| 514 | |
| 515 // If we got here, everything is OK | |
| 516 return null; | |
| 517 } | |
| 518 /** | |
| 519 * This is a utility function to decode a non-BMP | |
| 520 * UTF-16 surrogate pair. | |
| 521 * @param high high 16 bits | |
| 522 * @param low low 16 bits | |
| 523 * @return decoded character | |
| 524 */ | |
| 525 public static int decodeSurrogatePair(char high, char low) { | |
| 526 return 0x10000 + (high - 0xD800) * 0x400 + (low - 0xDC00); | |
| 527 } | |
| 528 | |
| 529 // [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | | |
| 530 // [-'()+,./:=?;*#@$_%] | |
| 531 public static boolean isXMLPublicIDCharacter(char c) { | |
| 532 | |
| 533 if (c >= 'a' && c <= 'z') return true; | |
| 534 if (c >= '?' && c <= 'Z') return true; | |
| 535 if (c >= '\'' && c <= ';') return true; | |
| 536 | |
| 537 if (c == ' ') return true; | |
| 538 if (c == '!') return true; | |
| 539 if (c == '=') return true; | |
| 540 if (c == '#') return true; | |
| 541 if (c == '$') return true; | |
| 542 if (c == '_') return true; | |
| 543 if (c == '%') return true; | |
| 544 if (c == '\n') return true; | |
| 545 if (c == '\r') return true; | |
| 546 if (c == '\t') return true; | |
| 547 | |
| 548 return false; | |
| 549 } | |
| 550 | |
| 551 /** | |
| 552 * This will ensure that the data for a public identifier | |
| 553 * is legal. | |
| 554 * | |
| 555 * @param publicID <code>String</code> public ID to check. | |
| 556 * @return <code>String</code> reason public ID is illegal, or | |
| 557 * <code>null</code> if public ID is OK. | |
| 558 */ | |
| 559 public static String checkPublicID(String publicID) { | |
| 560 String reason = null; | |
| 561 | |
| 562 if (publicID == null) return null; | |
| 563 // This indicates there is no public ID | |
| 564 | |
| 565 for (int i = 0; i < publicID.length(); i++) { | |
| 566 char c = publicID.charAt(i); | |
| 567 if (!isXMLPublicIDCharacter(c)) { | |
| 568 reason = c + " is not a legal character in public IDs"; | |
| 569 break; | |
| 570 } | |
| 571 } | |
| 572 | |
| 573 return reason; | |
| 574 } | |
| 575 | |
| 576 | |
| 577 /** | |
| 578 * This will ensure that the data for a system literal | |
| 579 * is legal. | |
| 580 * | |
| 581 * @param systemLiteral <code>String</code> system literal to check. | |
| 582 * @return <code>String</code> reason system literal is illegal, or | |
| 583 * <code>null</code> if system literal is OK. | |
| 584 */ | |
| 585 public static String checkSystemLiteral(String systemLiteral) { | |
| 586 String reason = null; | |
| 587 | |
| 588 if (systemLiteral == null) return null; | |
| 589 // This indicates there is no system ID | |
| 590 | |
| 591 if (systemLiteral.indexOf('\'') != -1 | |
| 592 && systemLiteral.indexOf('"') != -1) { | |
| 593 reason = | |
| 594 "System literals cannot simultaneously contain both single and double quotes."; | |
| 595 } | |
| 596 else { | |
| 597 reason = checkCharacterData(systemLiteral); | |
| 598 } | |
| 599 | |
| 600 return reason; | |
| 601 } | |
| 602 | |
| 603 /** | |
| 604 * This is a utility function for sharing the base process of checking | |
| 605 * any XML name. | |
| 606 * | |
| 607 * @param name <code>String</code> to check for XML name compliance. | |
| 608 * @return <code>String</code> reason the name is illegal, or | |
| 609 * <code>null</code> if OK. | |
| 610 */ | |
| 611 public static String checkXMLName(String name) { | |
| 612 // Cannot be empty or null | |
| 613 if ((name == null) || (name.length() == 0) | |
| 614 || (name.trim().equals(""))) { | |
| 615 return "XML names cannot be null or empty"; | |
| 616 } | |
| 617 | |
| 618 | |
| 619 // Cannot start with a number | |
| 620 char first = name.charAt(0); | |
| 621 if (!isXMLNameStartCharacter(first)) { | |
| 622 return "XML names cannot begin with the character \"" + | |
| 623 first + "\""; | |
| 624 } | |
| 625 // Ensure legal content for non-first chars | |
| 626 for (int i=1, len = name.length(); i<len; i++) { | |
| 627 char c = name.charAt(i); | |
| 628 if (!isXMLNameCharacter(c)) { | |
| 629 return "XML names cannot contain the character \"" + c + "\""; | |
| 630 } | |
| 631 } | |
| 632 | |
| 633 // We got here, so everything is OK | |
| 634 return null; | |
| 635 } | |
| 636 | |
| 637 /** | |
| 638 * <p> | |
| 639 * Checks a string to see if it is a legal RFC 2396 URI. | |
| 640 * Both absolute and relative URIs are supported. | |
| 641 * </p> | |
| 642 * | |
| 643 * @param uri <code>String</code> to check. | |
| 644 * @return <code>String</code> reason the URI is illegal, or | |
| 645 * <code>null</code> if OK. | |
| 646 */ | |
| 647 public static String checkURI(String uri) { | |
| 648 // URIs can be null or empty | |
| 649 if ((uri == null) || (uri.equals(""))) { | |
| 650 return null; | |
| 651 } | |
| 652 | |
| 653 for (int i = 0; i < uri.length(); i++) { | |
| 654 char test = uri.charAt(i); | |
| 655 if (!isURICharacter(test)) { | |
| 656 String msgNumber = "0x" + Integer.toHexString(test); | |
| 657 if (test <= 0x09) msgNumber = "0x0" + Integer.toHexString(test); | |
| 658 return "URIs cannot contain " + msgNumber; | |
| 659 } // end if | |
| 660 if (test == '%') { // must be followed by two hexadecimal digits | |
| 661 try { | |
| 662 char firstDigit = uri.charAt(i+1); | |
| 663 char secondDigit = uri.charAt(i+2); | |
| 664 if (!isHexDigit(firstDigit) || | |
| 665 !isHexDigit(secondDigit)) { | |
| 666 return "Percent signs in URIs must be followed by " | |
| 667 + "exactly two hexadecimal digits."; | |
| 668 } | |
| 669 | |
| 670 } | |
| 671 catch (StringIndexOutOfBoundsException e) { | |
| 672 return "Percent signs in URIs must be followed by " | |
| 673 + "exactly two hexadecimal digits."; | |
| 674 } | |
| 675 } | |
| 676 } // end for | |
| 677 | |
| 678 // If we got here, everything is OK | |
| 679 return null; | |
| 680 } | |
| 681 | |
| 682 /** | |
| 683 * <p> | |
| 684 * This is a utility function for determining whether a specified | |
| 685 * Unicode character is a hexadecimal digit as defined in RFC 2396; | |
| 686 * that is, one of the ASCII characters 0-9, a-f, or A-F. | |
| 687 * </p> | |
| 688 * | |
| 689 * @param c to check for hex digit. | |
| 690 * @return true if it's allowed, false otherwise. | |
| 691 */ | |
| 692 public static boolean isHexDigit(char c) { | |
| 693 | |
| 694 // I suspect most characters passed to this method will be | |
| 695 // correct hexadecimal digits, so I test for the true cases | |
| 696 // first. If this proves to be a performance bottleneck | |
| 697 // a switch statement or lookup table | |
| 698 // might optimize this. | |
| 699 if (c >= '0' && c <= '9') return true; | |
| 700 if (c >= 'A' && c <= 'F') return true; | |
| 701 if (c >= 'a' && c <= 'f') return true; | |
| 702 | |
| 703 return false; | |
| 704 } | |
| 705 | |
| 706 /** | |
| 707 * This is a function for determining whether the | |
| 708 * specified character is the high 16 bits in a | |
| 709 * UTF-16 surrogate pair. | |
| 710 * @param ch character to check | |
| 711 * @return true if the character is a high surrogate, false otherwise | |
| 712 */ | |
| 713 public static boolean isHighSurrogate(char ch) { | |
| 714 return (ch >= 0xD800 && ch <= 0xDBFF); | |
| 715 } | |
| 716 | |
| 717 /** | |
| 718 * This is a function for determining whether the | |
| 719 * specified character is the low 16 bits in a | |
| 720 * UTF-16 surrogate pair. | |
| 721 * @param ch character to check | |
| 722 * @return true if the character is a low surrogate, false otherwise. | |
| 723 */ | |
| 724 public static boolean isLowSurrogate(char ch) { | |
| 725 return (ch >= 0xDC00 && ch <= 0xDFFF); | |
| 726 } | |
| 727 | |
| 728 /** | |
| 729 * <p> | |
| 730 * This is a utility function for determining whether | |
| 731 * a specified Unicode character is legal in URI references | |
| 732 * as determined by RFC 2396. | |
| 733 * </p> | |
| 734 * | |
| 735 * @param c <code>char</code> to check for URI reference compliance. | |
| 736 * @return true if it's allowed, false otherwise. | |
| 737 */ | |
| 738 public static boolean isURICharacter(char c) { | |
| 739 if (c >= 'a' && c <= 'z') return true; | |
| 740 if (c >= 'A' && c <= 'Z') return true; | |
| 741 if (c >= '0' && c <= '9') return true; | |
| 742 if (c == '/') return true; | |
| 743 if (c == '-') return true; | |
| 744 if (c == '.') return true; | |
| 745 if (c == '?') return true; | |
| 746 if (c == ':') return true; | |
| 747 if (c == '@') return true; | |
| 748 if (c == '&') return true; | |
| 749 if (c == '=') return true; | |
| 750 if (c == '+') return true; | |
| 751 if (c == '$') return true; | |
| 752 if (c == ',') return true; | |
| 753 if (c == '%') return true; | |
| 754 | |
| 755 if (c == '_') return true; | |
| 756 if (c == '!') return true; | |
| 757 if (c == '~') return true; | |
| 758 if (c == '*') return true; | |
| 759 if (c == '\'') return true; | |
| 760 if (c == '(') return true; | |
| 761 if (c == ')') return true; | |
| 762 return false; | |
| 763 } | |
| 764 | |
| 765 /** | |
| 766 * This is a utility function for determining whether a specified | |
| 767 * character is a character according to production 2 of the | |
| 768 * XML 1.0 specification. | |
| 769 * | |
| 770 * @param c <code>char</code> to check for XML compliance | |
| 771 * @return <code>boolean</code> true if it's a character, | |
| 772 * false otherwise | |
| 773 */ | |
| 774 public static boolean isXMLCharacter(int c) { | |
| 775 | |
| 776 if (c == '\n') return true; | |
| 777 if (c == '\r') return true; | |
| 778 if (c == '\t') return true; | |
| 779 | |
| 780 if (c < 0x20) return false; if (c <= 0xD7FF) return true; | |
| 781 if (c < 0xE000) return false; if (c <= 0xFFFD) return true; | |
| 782 if (c < 0x10000) return false; if (c <= 0x10FFFF) return true; | |
| 783 | |
| 784 return false; | |
| 785 } | |
| 786 | |
| 787 | |
| 788 /** | |
| 789 * This is a utility function for determining whether a specified | |
| 790 * character is a name character according to production 4 of the | |
| 791 * XML 1.0 specification. | |
| 792 * | |
| 793 * @param c <code>char</code> to check for XML name compliance. | |
| 794 * @return <code>boolean</code> true if it's a name character, | |
| 795 * false otherwise. | |
| 796 */ | |
| 797 public static boolean isXMLNameCharacter(char c) { | |
| 798 | |
| 799 return (isXMLLetter(c) || isXMLDigit(c) || c == '.' || c == '-' | |
| 800 || c == '_' || c == ':' || isXMLCombiningChar(c) | |
| 801 || isXMLExtender(c)); | |
| 802 } | |
| 803 | |
| 804 /** | |
| 805 * This is a utility function for determining whether a specified | |
| 806 * character is a legal name start character according to production 5 | |
| 807 * of the XML 1.0 specification. This production does allow names | |
| 808 * to begin with colons which the Namespaces in XML Recommendation | |
| 809 * disallows. | |
| 810 * | |
| 811 * @param c <code>char</code> to check for XML name start compliance. | |
| 812 * @return <code>boolean</code> true if it's a name start character, | |
| 813 * false otherwise. | |
| 814 */ | |
| 815 public static boolean isXMLNameStartCharacter(char c) { | |
| 816 | |
| 817 return (isXMLLetter(c) || c == '_' || c ==':'); | |
| 818 | |
| 819 } | |
| 820 | |
| 821 /** | |
| 822 * This is a utility function for determining whether a specified | |
| 823 * character is a letter or digit according to productions 84 and 88 | |
| 824 * of the XML 1.0 specification. | |
| 825 * | |
| 826 * @param c <code>char</code> to check. | |
| 827 * @return <code>boolean</code> true if it's letter or digit, | |
| 828 * false otherwise. | |
| 829 */ | |
| 830 public static boolean isXMLLetterOrDigit(char c) { | |
| 831 | |
| 832 return (isXMLLetter(c) || isXMLDigit(c)); | |
| 833 | |
| 834 } | |
| 835 | |
| 836 /** | |
| 837 * This is a utility function for determining whether a specified character | |
| 838 * is a letter according to production 84 of the XML 1.0 specification. | |
| 839 * | |
| 840 * @param c <code>char</code> to check for XML name compliance. | |
| 841 * @return <code>String</code> true if it's a letter, false otherwise. | |
| 842 */ | |
| 843 public static boolean isXMLLetter(char c) { | |
| 844 // Note that order is very important here. The search proceeds | |
| 845 // from lowest to highest values, so that no searching occurs | |
| 846 // above the character's value. BTW, the first line is equivalent to: | |
| 847 // if (c >= 0x0041 && c <= 0x005A) return true; | |
| 848 | |
| 849 if (c < 0x0041) return false; if (c <= 0x005a) return true; | |
| 850 if (c < 0x0061) return false; if (c <= 0x007A) return true; | |
| 851 if (c < 0x00C0) return false; if (c <= 0x00D6) return true; | |
| 852 if (c < 0x00D8) return false; if (c <= 0x00F6) return true; | |
| 853 if (c < 0x00F8) return false; if (c <= 0x00FF) return true; | |
| 854 if (c < 0x0100) return false; if (c <= 0x0131) return true; | |
| 855 if (c < 0x0134) return false; if (c <= 0x013E) return true; | |
| 856 if (c < 0x0141) return false; if (c <= 0x0148) return true; | |
| 857 if (c < 0x014A) return false; if (c <= 0x017E) return true; | |
| 858 if (c < 0x0180) return false; if (c <= 0x01C3) return true; | |
| 859 if (c < 0x01CD) return false; if (c <= 0x01F0) return true; | |
| 860 if (c < 0x01F4) return false; if (c <= 0x01F5) return true; | |
| 861 if (c < 0x01FA) return false; if (c <= 0x0217) return true; | |
| 862 if (c < 0x0250) return false; if (c <= 0x02A8) return true; | |
| 863 if (c < 0x02BB) return false; if (c <= 0x02C1) return true; | |
| 864 if (c == 0x0386) return true; | |
| 865 if (c < 0x0388) return false; if (c <= 0x038A) return true; | |
| 866 if (c == 0x038C) return true; | |
| 867 if (c < 0x038E) return false; if (c <= 0x03A1) return true; | |
| 868 if (c < 0x03A3) return false; if (c <= 0x03CE) return true; | |
| 869 if (c < 0x03D0) return false; if (c <= 0x03D6) return true; | |
| 870 if (c == 0x03DA) return true; | |
| 871 if (c == 0x03DC) return true; | |
| 872 if (c == 0x03DE) return true; | |
| 873 if (c == 0x03E0) return true; | |
| 874 if (c < 0x03E2) return false; if (c <= 0x03F3) return true; | |
| 875 if (c < 0x0401) return false; if (c <= 0x040C) return true; | |
| 876 if (c < 0x040E) return false; if (c <= 0x044F) return true; | |
| 877 if (c < 0x0451) return false; if (c <= 0x045C) return true; | |
| 878 if (c < 0x045E) return false; if (c <= 0x0481) return true; | |
| 879 if (c < 0x0490) return false; if (c <= 0x04C4) return true; | |
| 880 if (c < 0x04C7) return false; if (c <= 0x04C8) return true; | |
| 881 if (c < 0x04CB) return false; if (c <= 0x04CC) return true; | |
| 882 if (c < 0x04D0) return false; if (c <= 0x04EB) return true; | |
| 883 if (c < 0x04EE) return false; if (c <= 0x04F5) return true; | |
| 884 if (c < 0x04F8) return false; if (c <= 0x04F9) return true; | |
| 885 if (c < 0x0531) return false; if (c <= 0x0556) return true; | |
| 886 if (c == 0x0559) return true; | |
| 887 if (c < 0x0561) return false; if (c <= 0x0586) return true; | |
| 888 if (c < 0x05D0) return false; if (c <= 0x05EA) return true; | |
| 889 if (c < 0x05F0) return false; if (c <= 0x05F2) return true; | |
| 890 if (c < 0x0621) return false; if (c <= 0x063A) return true; | |
| 891 if (c < 0x0641) return false; if (c <= 0x064A) return true; | |
| 892 if (c < 0x0671) return false; if (c <= 0x06B7) return true; | |
| 893 if (c < 0x06BA) return false; if (c <= 0x06BE) return true; | |
| 894 if (c < 0x06C0) return false; if (c <= 0x06CE) return true; | |
| 895 if (c < 0x06D0) return false; if (c <= 0x06D3) return true; | |
| 896 if (c == 0x06D5) return true; | |
| 897 if (c < 0x06E5) return false; if (c <= 0x06E6) return true; | |
| 898 if (c < 0x0905) return false; if (c <= 0x0939) return true; | |
| 899 if (c == 0x093D) return true; | |
| 900 if (c < 0x0958) return false; if (c <= 0x0961) return true; | |
| 901 if (c < 0x0985) return false; if (c <= 0x098C) return true; | |
| 902 if (c < 0x098F) return false; if (c <= 0x0990) return true; | |
| 903 if (c < 0x0993) return false; if (c <= 0x09A8) return true; | |
| 904 if (c < 0x09AA) return false; if (c <= 0x09B0) return true; | |
| 905 if (c == 0x09B2) return true; | |
| 906 if (c < 0x09B6) return false; if (c <= 0x09B9) return true; | |
| 907 if (c < 0x09DC) return false; if (c <= 0x09DD) return true; | |
| 908 if (c < 0x09DF) return false; if (c <= 0x09E1) return true; | |
| 909 if (c < 0x09F0) return false; if (c <= 0x09F1) return true; | |
| 910 if (c < 0x0A05) return false; if (c <= 0x0A0A) return true; | |
| 911 if (c < 0x0A0F) return false; if (c <= 0x0A10) return true; | |
| 912 if (c < 0x0A13) return false; if (c <= 0x0A28) return true; | |
| 913 if (c < 0x0A2A) return false; if (c <= 0x0A30) return true; | |
| 914 if (c < 0x0A32) return false; if (c <= 0x0A33) return true; | |
| 915 if (c < 0x0A35) return false; if (c <= 0x0A36) return true; | |
| 916 if (c < 0x0A38) return false; if (c <= 0x0A39) return true; | |
| 917 if (c < 0x0A59) return false; if (c <= 0x0A5C) return true; | |
| 918 if (c == 0x0A5E) return true; | |
| 919 if (c < 0x0A72) return false; if (c <= 0x0A74) return true; | |
| 920 if (c < 0x0A85) return false; if (c <= 0x0A8B) return true; | |
| 921 if (c == 0x0A8D) return true; | |
| 922 if (c < 0x0A8F) return false; if (c <= 0x0A91) return true; | |
| 923 if (c < 0x0A93) return false; if (c <= 0x0AA8) return true; | |
| 924 if (c < 0x0AAA) return false; if (c <= 0x0AB0) return true; | |
| 925 if (c < 0x0AB2) return false; if (c <= 0x0AB3) return true; | |
| 926 if (c < 0x0AB5) return false; if (c <= 0x0AB9) return true; | |
| 927 if (c == 0x0ABD) return true; | |
| 928 if (c == 0x0AE0) return true; | |
| 929 if (c < 0x0B05) return false; if (c <= 0x0B0C) return true; | |
| 930 if (c < 0x0B0F) return false; if (c <= 0x0B10) return true; | |
| 931 if (c < 0x0B13) return false; if (c <= 0x0B28) return true; | |
| 932 if (c < 0x0B2A) return false; if (c <= 0x0B30) return true; | |
| 933 if (c < 0x0B32) return false; if (c <= 0x0B33) return true; | |
| 934 if (c < 0x0B36) return false; if (c <= 0x0B39) return true; | |
| 935 if (c == 0x0B3D) return true; | |
| 936 if (c < 0x0B5C) return false; if (c <= 0x0B5D) return true; | |
| 937 if (c < 0x0B5F) return false; if (c <= 0x0B61) return true; | |
| 938 if (c < 0x0B85) return false; if (c <= 0x0B8A) return true; | |
| 939 if (c < 0x0B8E) return false; if (c <= 0x0B90) return true; | |
| 940 if (c < 0x0B92) return false; if (c <= 0x0B95) return true; | |
| 941 if (c < 0x0B99) return false; if (c <= 0x0B9A) return true; | |
| 942 if (c == 0x0B9C) return true; | |
| 943 if (c < 0x0B9E) return false; if (c <= 0x0B9F) return true; | |
| 944 if (c < 0x0BA3) return false; if (c <= 0x0BA4) return true; | |
| 945 if (c < 0x0BA8) return false; if (c <= 0x0BAA) return true; | |
| 946 if (c < 0x0BAE) return false; if (c <= 0x0BB5) return true; | |
| 947 if (c < 0x0BB7) return false; if (c <= 0x0BB9) return true; | |
| 948 if (c < 0x0C05) return false; if (c <= 0x0C0C) return true; | |
| 949 if (c < 0x0C0E) return false; if (c <= 0x0C10) return true; | |
| 950 if (c < 0x0C12) return false; if (c <= 0x0C28) return true; | |
| 951 if (c < 0x0C2A) return false; if (c <= 0x0C33) return true; | |
| 952 if (c < 0x0C35) return false; if (c <= 0x0C39) return true; | |
| 953 if (c < 0x0C60) return false; if (c <= 0x0C61) return true; | |
| 954 if (c < 0x0C85) return false; if (c <= 0x0C8C) return true; | |
| 955 if (c < 0x0C8E) return false; if (c <= 0x0C90) return true; | |
| 956 if (c < 0x0C92) return false; if (c <= 0x0CA8) return true; | |
| 957 if (c < 0x0CAA) return false; if (c <= 0x0CB3) return true; | |
| 958 if (c < 0x0CB5) return false; if (c <= 0x0CB9) return true; | |
| 959 if (c == 0x0CDE) return true; | |
| 960 if (c < 0x0CE0) return false; if (c <= 0x0CE1) return true; | |
| 961 if (c < 0x0D05) return false; if (c <= 0x0D0C) return true; | |
| 962 if (c < 0x0D0E) return false; if (c <= 0x0D10) return true; | |
| 963 if (c < 0x0D12) return false; if (c <= 0x0D28) return true; | |
| 964 if (c < 0x0D2A) return false; if (c <= 0x0D39) return true; | |
| 965 if (c < 0x0D60) return false; if (c <= 0x0D61) return true; | |
| 966 if (c < 0x0E01) return false; if (c <= 0x0E2E) return true; | |
| 967 if (c == 0x0E30) return true; | |
| 968 if (c < 0x0E32) return false; if (c <= 0x0E33) return true; | |
| 969 if (c < 0x0E40) return false; if (c <= 0x0E45) return true; | |
| 970 if (c < 0x0E81) return false; if (c <= 0x0E82) return true; | |
| 971 if (c == 0x0E84) return true; | |
| 972 if (c < 0x0E87) return false; if (c <= 0x0E88) return true; | |
| 973 if (c == 0x0E8A) return true; | |
| 974 if (c == 0x0E8D) return true; | |
| 975 if (c < 0x0E94) return false; if (c <= 0x0E97) return true; | |
| 976 if (c < 0x0E99) return false; if (c <= 0x0E9F) return true; | |
| 977 if (c < 0x0EA1) return false; if (c <= 0x0EA3) return true; | |
| 978 if (c == 0x0EA5) return true; | |
| 979 if (c == 0x0EA7) return true; | |
| 980 if (c < 0x0EAA) return false; if (c <= 0x0EAB) return true; | |
| 981 if (c < 0x0EAD) return false; if (c <= 0x0EAE) return true; | |
| 982 if (c == 0x0EB0) return true; | |
| 983 if (c < 0x0EB2) return false; if (c <= 0x0EB3) return true; | |
| 984 if (c == 0x0EBD) return true; | |
| 985 if (c < 0x0EC0) return false; if (c <= 0x0EC4) return true; | |
| 986 if (c < 0x0F40) return false; if (c <= 0x0F47) return true; | |
| 987 if (c < 0x0F49) return false; if (c <= 0x0F69) return true; | |
| 988 if (c < 0x10A0) return false; if (c <= 0x10C5) return true; | |
| 989 if (c < 0x10D0) return false; if (c <= 0x10F6) return true; | |
| 990 if (c == 0x1100) return true; | |
| 991 if (c < 0x1102) return false; if (c <= 0x1103) return true; | |
| 992 if (c < 0x1105) return false; if (c <= 0x1107) return true; | |
| 993 if (c == 0x1109) return true; | |
| 994 if (c < 0x110B) return false; if (c <= 0x110C) return true; | |
| 995 if (c < 0x110E) return false; if (c <= 0x1112) return true; | |
| 996 if (c == 0x113C) return true; | |
| 997 if (c == 0x113E) return true; | |
| 998 if (c == 0x1140) return true; | |
| 999 if (c == 0x114C) return true; | |
| 1000 if (c == 0x114E) return true; | |
| 1001 if (c == 0x1150) return true; | |
| 1002 if (c < 0x1154) return false; if (c <= 0x1155) return true; | |
| 1003 if (c == 0x1159) return true; | |
| 1004 if (c < 0x115F) return false; if (c <= 0x1161) return true; | |
| 1005 if (c == 0x1163) return true; | |
| 1006 if (c == 0x1165) return true; | |
| 1007 if (c == 0x1167) return true; | |
| 1008 if (c == 0x1169) return true; | |
| 1009 if (c < 0x116D) return false; if (c <= 0x116E) return true; | |
| 1010 if (c < 0x1172) return false; if (c <= 0x1173) return true; | |
| 1011 if (c == 0x1175) return true; | |
| 1012 if (c == 0x119E) return true; | |
| 1013 if (c == 0x11A8) return true; | |
| 1014 if (c == 0x11AB) return true; | |
| 1015 if (c < 0x11AE) return false; if (c <= 0x11AF) return true; | |
| 1016 if (c < 0x11B7) return false; if (c <= 0x11B8) return true; | |
| 1017 if (c == 0x11BA) return true; | |
| 1018 if (c < 0x11BC) return false; if (c <= 0x11C2) return true; | |
| 1019 if (c == 0x11EB) return true; | |
| 1020 if (c == 0x11F0) return true; | |
| 1021 if (c == 0x11F9) return true; | |
| 1022 if (c < 0x1E00) return false; if (c <= 0x1E9B) return true; | |
| 1023 if (c < 0x1EA0) return false; if (c <= 0x1EF9) return true; | |
| 1024 if (c < 0x1F00) return false; if (c <= 0x1F15) return true; | |
| 1025 if (c < 0x1F18) return false; if (c <= 0x1F1D) return true; | |
| 1026 if (c < 0x1F20) return false; if (c <= 0x1F45) return true; | |
| 1027 if (c < 0x1F48) return false; if (c <= 0x1F4D) return true; | |
| 1028 if (c < 0x1F50) return false; if (c <= 0x1F57) return true; | |
| 1029 if (c == 0x1F59) return true; | |
| 1030 if (c == 0x1F5B) return true; | |
| 1031 if (c == 0x1F5D) return true; | |
| 1032 if (c < 0x1F5F) return false; if (c <= 0x1F7D) return true; | |
| 1033 if (c < 0x1F80) return false; if (c <= 0x1FB4) return true; | |
| 1034 if (c < 0x1FB6) return false; if (c <= 0x1FBC) return true; | |
| 1035 if (c == 0x1FBE) return true; | |
| 1036 if (c < 0x1FC2) return false; if (c <= 0x1FC4) return true; | |
| 1037 if (c < 0x1FC6) return false; if (c <= 0x1FCC) return true; | |
| 1038 if (c < 0x1FD0) return false; if (c <= 0x1FD3) return true; | |
| 1039 if (c < 0x1FD6) return false; if (c <= 0x1FDB) return true; | |
| 1040 if (c < 0x1FE0) return false; if (c <= 0x1FEC) return true; | |
| 1041 if (c < 0x1FF2) return false; if (c <= 0x1FF4) return true; | |
| 1042 if (c < 0x1FF6) return false; if (c <= 0x1FFC) return true; | |
| 1043 if (c == 0x2126) return true; | |
| 1044 if (c < 0x212A) return false; if (c <= 0x212B) return true; | |
| 1045 if (c == 0x212E) return true; | |
| 1046 if (c < 0x2180) return false; if (c <= 0x2182) return true; | |
| 1047 if (c == 0x3007) return true; // ideographic | |
| 1048 if (c < 0x3021) return false; if (c <= 0x3029) return true; // ideo | |
| 1049 if (c < 0x3041) return false; if (c <= 0x3094) return true; | |
| 1050 if (c < 0x30A1) return false; if (c <= 0x30FA) return true; | |
| 1051 if (c < 0x3105) return false; if (c <= 0x312C) return true; | |
| 1052 if (c < 0x4E00) return false; if (c <= 0x9FA5) return true; // ideo | |
| 1053 if (c < 0xAC00) return false; if (c <= 0xD7A3) return true; | |
| 1054 | |
| 1055 return false; | |
| 1056 | |
| 1057 } | |
| 1058 | |
| 1059 /** | |
| 1060 * This is a utility function for determining whether a specified character | |
| 1061 * is a combining character according to production 87 | |
| 1062 * of the XML 1.0 specification. | |
| 1063 * | |
| 1064 * @param c <code>char</code> to check. | |
| 1065 * @return <code>boolean</code> true if it's a combining character, | |
| 1066 * false otherwise. | |
| 1067 */ | |
| 1068 public static boolean isXMLCombiningChar(char c) { | |
| 1069 // CombiningChar | |
| 1070 if (c < 0x0300) return false; if (c <= 0x0345) return true; | |
| 1071 if (c < 0x0360) return false; if (c <= 0x0361) return true; | |
| 1072 if (c < 0x0483) return false; if (c <= 0x0486) return true; | |
| 1073 if (c < 0x0591) return false; if (c <= 0x05A1) return true; | |
| 1074 | |
| 1075 if (c < 0x05A3) return false; if (c <= 0x05B9) return true; | |
| 1076 if (c < 0x05BB) return false; if (c <= 0x05BD) return true; | |
| 1077 if (c == 0x05BF) return true; | |
| 1078 if (c < 0x05C1) return false; if (c <= 0x05C2) return true; | |
| 1079 | |
| 1080 if (c == 0x05C4) return true; | |
| 1081 if (c < 0x064B) return false; if (c <= 0x0652) return true; | |
| 1082 if (c == 0x0670) return true; | |
| 1083 if (c < 0x06D6) return false; if (c <= 0x06DC) return true; | |
| 1084 | |
| 1085 if (c < 0x06DD) return false; if (c <= 0x06DF) return true; | |
| 1086 if (c < 0x06E0) return false; if (c <= 0x06E4) return true; | |
| 1087 if (c < 0x06E7) return false; if (c <= 0x06E8) return true; | |
| 1088 | |
| 1089 if (c < 0x06EA) return false; if (c <= 0x06ED) return true; | |
| 1090 if (c < 0x0901) return false; if (c <= 0x0903) return true; | |
| 1091 if (c == 0x093C) return true; | |
| 1092 if (c < 0x093E) return false; if (c <= 0x094C) return true; | |
| 1093 | |
| 1094 if (c == 0x094D) return true; | |
| 1095 if (c < 0x0951) return false; if (c <= 0x0954) return true; | |
| 1096 if (c < 0x0962) return false; if (c <= 0x0963) return true; | |
| 1097 if (c < 0x0981) return false; if (c <= 0x0983) return true; | |
| 1098 | |
| 1099 if (c == 0x09BC) return true; | |
| 1100 if (c == 0x09BE) return true; | |
| 1101 if (c == 0x09BF) return true; | |
| 1102 if (c < 0x09C0) return false; if (c <= 0x09C4) return true; | |
| 1103 if (c < 0x09C7) return false; if (c <= 0x09C8) return true; | |
| 1104 | |
| 1105 if (c < 0x09CB) return false; if (c <= 0x09CD) return true; | |
| 1106 if (c == 0x09D7) return true; | |
| 1107 if (c < 0x09E2) return false; if (c <= 0x09E3) return true; | |
| 1108 if (c == 0x0A02) return true; | |
| 1109 if (c == 0x0A3C) return true; | |
| 1110 | |
| 1111 if (c == 0x0A3E) return true; | |
| 1112 if (c == 0x0A3F) return true; | |
| 1113 if (c < 0x0A40) return false; if (c <= 0x0A42) return true; | |
| 1114 if (c < 0x0A47) return false; if (c <= 0x0A48) return true; | |
| 1115 | |
| 1116 if (c < 0x0A4B) return false; if (c <= 0x0A4D) return true; | |
| 1117 if (c < 0x0A70) return false; if (c <= 0x0A71) return true; | |
| 1118 if (c < 0x0A81) return false; if (c <= 0x0A83) return true; | |
| 1119 if (c == 0x0ABC) return true; | |
| 1120 | |
| 1121 if (c < 0x0ABE) return false; if (c <= 0x0AC5) return true; | |
| 1122 if (c < 0x0AC7) return false; if (c <= 0x0AC9) return true; | |
| 1123 if (c < 0x0ACB) return false; if (c <= 0x0ACD) return true; | |
| 1124 | |
| 1125 if (c < 0x0B01) return false; if (c <= 0x0B03) return true; | |
| 1126 if (c == 0x0B3C) return true; | |
| 1127 if (c < 0x0B3E) return false; if (c <= 0x0B43) return true; | |
| 1128 if (c < 0x0B47) return false; if (c <= 0x0B48) return true; | |
| 1129 | |
| 1130 if (c < 0x0B4B) return false; if (c <= 0x0B4D) return true; | |
| 1131 if (c < 0x0B56) return false; if (c <= 0x0B57) return true; | |
| 1132 if (c < 0x0B82) return false; if (c <= 0x0B83) return true; | |
| 1133 | |
| 1134 if (c < 0x0BBE) return false; if (c <= 0x0BC2) return true; | |
| 1135 if (c < 0x0BC6) return false; if (c <= 0x0BC8) return true; | |
| 1136 if (c < 0x0BCA) return false; if (c <= 0x0BCD) return true; | |
| 1137 if (c == 0x0BD7) return true; | |
| 1138 | |
| 1139 if (c < 0x0C01) return false; if (c <= 0x0C03) return true; | |
| 1140 if (c < 0x0C3E) return false; if (c <= 0x0C44) return true; | |
| 1141 if (c < 0x0C46) return false; if (c <= 0x0C48) return true; | |
| 1142 | |
| 1143 if (c < 0x0C4A) return false; if (c <= 0x0C4D) return true; | |
| 1144 if (c < 0x0C55) return false; if (c <= 0x0C56) return true; | |
| 1145 if (c < 0x0C82) return false; if (c <= 0x0C83) return true; | |
| 1146 | |
| 1147 if (c < 0x0CBE) return false; if (c <= 0x0CC4) return true; | |
| 1148 if (c < 0x0CC6) return false; if (c <= 0x0CC8) return true; | |
| 1149 if (c < 0x0CCA) return false; if (c <= 0x0CCD) return true; | |
| 1150 | |
| 1151 if (c < 0x0CD5) return false; if (c <= 0x0CD6) return true; | |
| 1152 if (c < 0x0D02) return false; if (c <= 0x0D03) return true; | |
| 1153 if (c < 0x0D3E) return false; if (c <= 0x0D43) return true; | |
| 1154 | |
| 1155 if (c < 0x0D46) return false; if (c <= 0x0D48) return true; | |
| 1156 if (c < 0x0D4A) return false; if (c <= 0x0D4D) return true; | |
| 1157 if (c == 0x0D57) return true; | |
| 1158 if (c == 0x0E31) return true; | |
| 1159 | |
| 1160 if (c < 0x0E34) return false; if (c <= 0x0E3A) return true; | |
| 1161 if (c < 0x0E47) return false; if (c <= 0x0E4E) return true; | |
| 1162 if (c == 0x0EB1) return true; | |
| 1163 if (c < 0x0EB4) return false; if (c <= 0x0EB9) return true; | |
| 1164 | |
| 1165 if (c < 0x0EBB) return false; if (c <= 0x0EBC) return true; | |
| 1166 if (c < 0x0EC8) return false; if (c <= 0x0ECD) return true; | |
| 1167 if (c < 0x0F18) return false; if (c <= 0x0F19) return true; | |
| 1168 if (c == 0x0F35) return true; | |
| 1169 | |
| 1170 if (c == 0x0F37) return true; | |
| 1171 if (c == 0x0F39) return true; | |
| 1172 if (c == 0x0F3E) return true; | |
| 1173 if (c == 0x0F3F) return true; | |
| 1174 if (c < 0x0F71) return false; if (c <= 0x0F84) return true; | |
| 1175 | |
| 1176 if (c < 0x0F86) return false; if (c <= 0x0F8B) return true; | |
| 1177 if (c < 0x0F90) return false; if (c <= 0x0F95) return true; | |
| 1178 if (c == 0x0F97) return true; | |
| 1179 if (c < 0x0F99) return false; if (c <= 0x0FAD) return true; | |
| 1180 | |
| 1181 if (c < 0x0FB1) return false; if (c <= 0x0FB7) return true; | |
| 1182 if (c == 0x0FB9) return true; | |
| 1183 if (c < 0x20D0) return false; if (c <= 0x20DC) return true; | |
| 1184 if (c == 0x20E1) return true; | |
| 1185 | |
| 1186 if (c < 0x302A) return false; if (c <= 0x302F) return true; | |
| 1187 if (c == 0x3099) return true; | |
| 1188 if (c == 0x309A) return true; | |
| 1189 | |
| 1190 return false; | |
| 1191 | |
| 1192 } | |
| 1193 | |
| 1194 /** | |
| 1195 * This is a utility function for determining whether a specified | |
| 1196 * character is an extender according to production 88 of the XML 1.0 | |
| 1197 * specification. | |
| 1198 * | |
| 1199 * @param c <code>char</code> to check. | |
| 1200 * @return <code>String</code> true if it's an extender, false otherwise. | |
| 1201 */ | |
| 1202 public static boolean isXMLExtender(char c) { | |
| 1203 | |
| 1204 if (c < 0x00B6) return false; // quick short circuit | |
| 1205 | |
| 1206 // Extenders | |
| 1207 if (c == 0x00B7) return true; | |
| 1208 if (c == 0x02D0) return true; | |
| 1209 if (c == 0x02D1) return true; | |
| 1210 if (c == 0x0387) return true; | |
| 1211 if (c == 0x0640) return true; | |
| 1212 if (c == 0x0E46) return true; | |
| 1213 if (c == 0x0EC6) return true; | |
| 1214 if (c == 0x3005) return true; | |
| 1215 | |
| 1216 if (c < 0x3031) return false; if (c <= 0x3035) return true; | |
| 1217 if (c < 0x309D) return false; if (c <= 0x309E) return true; | |
| 1218 if (c < 0x30FC) return false; if (c <= 0x30FE) return true; | |
| 1219 | |
| 1220 return false; | |
| 1221 | |
| 1222 } | |
| 1223 | |
| 1224 /** | |
| 1225 * This is a utility function for determining whether a specified | |
| 1226 * Unicode character | |
| 1227 * is a digit according to production 88 of the XML 1.0 specification. | |
| 1228 * | |
| 1229 * @param c <code>char</code> to check for XML digit compliance | |
| 1230 * @return <code>boolean</code> true if it's a digit, false otherwise | |
| 1231 */ | |
| 1232 public static boolean isXMLDigit(char c) { | |
| 1233 | |
| 1234 if (c < 0x0030) return false; if (c <= 0x0039) return true; | |
| 1235 if (c < 0x0660) return false; if (c <= 0x0669) return true; | |
| 1236 if (c < 0x06F0) return false; if (c <= 0x06F9) return true; | |
| 1237 if (c < 0x0966) return false; if (c <= 0x096F) return true; | |
| 1238 | |
| 1239 if (c < 0x09E6) return false; if (c <= 0x09EF) return true; | |
| 1240 if (c < 0x0A66) return false; if (c <= 0x0A6F) return true; | |
| 1241 if (c < 0x0AE6) return false; if (c <= 0x0AEF) return true; | |
| 1242 | |
| 1243 if (c < 0x0B66) return false; if (c <= 0x0B6F) return true; | |
| 1244 if (c < 0x0BE7) return false; if (c <= 0x0BEF) return true; | |
| 1245 if (c < 0x0C66) return false; if (c <= 0x0C6F) return true; | |
| 1246 | |
| 1247 if (c < 0x0CE6) return false; if (c <= 0x0CEF) return true; | |
| 1248 if (c < 0x0D66) return false; if (c <= 0x0D6F) return true; | |
| 1249 if (c < 0x0E50) return false; if (c <= 0x0E59) return true; | |
| 1250 | |
| 1251 if (c < 0x0ED0) return false; if (c <= 0x0ED9) return true; | |
| 1252 if (c < 0x0F20) return false; if (c <= 0x0F29) return true; | |
| 1253 | |
| 1254 return false; | |
| 1255 } | |
| 1256 | |
| 1257 /** | |
| 1258 * This is a utility function for determining whether a specified | |
| 1259 * Unicode character is a whitespace character according to production 3 | |
| 1260 * of the XML 1.0 specification. | |
| 1261 * | |
| 1262 * @param c <code>char</code> to check for XML whitespace compliance | |
| 1263 * @return <code>boolean</code> true if it's a whitespace, false otherwise | |
| 1264 */ | |
| 1265 public static boolean isXMLWhitespace(char c) { | |
| 1266 if (c==' ' || c=='\n' || c=='\t' || c=='\r' ){ | |
| 1267 return true; | |
| 1268 } | |
| 1269 return false; | |
| 1270 } | |
| 1271 } |
