Mercurial > repos > pfrommolt > ngsrich
diff NGSrich_0.5.5/src/org/jdom/Verifier.java @ 0:89ad0a9cca52 default tip
Uploaded
author | pfrommolt |
---|---|
date | Mon, 21 Nov 2011 08:12:19 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/NGSrich_0.5.5/src/org/jdom/Verifier.java Mon Nov 21 08:12:19 2011 -0500 @@ -0,0 +1,1271 @@ +/*-- + + $Id: Verifier.java,v 1.57 2009/07/23 05:54:23 jhunter Exp $ + + Copyright (C) 2000-2007 Jason Hunter & Brett McLaughlin. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions, and the disclaimer that follows + these conditions in the documentation and/or other materials + provided with the distribution. + + 3. The name "JDOM" must not be used to endorse or promote products + derived from this software without prior written permission. For + written permission, please contact <request_AT_jdom_DOT_org>. + + 4. Products derived from this software may not be called "JDOM", nor + may "JDOM" appear in their name, without prior written permission + from the JDOM Project Management <request_AT_jdom_DOT_org>. + + In addition, we request (but do not require) that you include in the + end-user documentation provided with the redistribution and/or in the + software itself an acknowledgement equivalent to the following: + "This product includes software developed by the + JDOM Project (http://www.jdom.org/)." + Alternatively, the acknowledgment may be graphical using the logos + available at http://www.jdom.org/images/logos. + + THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE JDOM AUTHORS OR THE PROJECT + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + SUCH DAMAGE. + + This software consists of voluntary contributions made by many + individuals on behalf of the JDOM Project and was originally + created by Jason Hunter <jhunter_AT_jdom_DOT_org> and + Brett McLaughlin <brett_AT_jdom_DOT_org>. For more information + on the JDOM Project, please see <http://www.jdom.org/>. + + */ + +package org.jdom; + +import java.util.*; + +/** + * A utility class to handle well-formedness checks on names, data, and other + * verification tasks for JDOM. The class is final and may not be subclassed. + * + * @version $Revision: 1.57 $, $Date: 2009/07/23 05:54:23 $ + * @author Brett McLaughlin + * @author Elliotte Rusty Harold + * @author Jason Hunter + * @author Bradley S. Huffman + */ +final public class Verifier { + + private static final String CVS_ID = + "@(#) $RCSfile: Verifier.java,v $ $Revision: 1.57 $ $Date: 2009/07/23 05:54:23 $ $Name: jdom_1_1_1 $"; + + /** + * Ensure instantation cannot occur. + */ + private Verifier() { } + + /** + * This will check the supplied name to see if it is legal for use as + * a JDOM <code>{@link Element}</code> name. + * + * @param name <code>String</code> name to check. + * @return <code>String</code> reason name is illegal, or + * <code>null</code> if name is OK. + */ + public static String checkElementName(String name) { + // Check basic XML name rules first + String reason; + if ((reason = checkXMLName(name)) != null) { + return reason; + } + + // No colons allowed, since elements handle this internally + if (name.indexOf(":") != -1) { + return "Element names cannot contain colons"; + } + + // If we got here, everything is OK + return null; + } + + /** + * This will check the supplied name to see if it is legal for use as + * a JDOM <code>{@link Attribute}</code> name. + * + * @param name <code>String</code> name to check. + * @return <code>String</code> reason name is illegal, or + * <code>null</code> if name is OK. + */ + public static String checkAttributeName(String name) { + // Check basic XML name rules first + String reason; + if ((reason = checkXMLName(name)) != null) { + return reason; + } + + // No colons are allowed, since attributes handle this internally + if (name.indexOf(":") != -1) { + return "Attribute names cannot contain colons"; + } + + // Attribute names may not be xmlns since we do this internally too + if (name.equals("xmlns")) { + return "An Attribute name may not be \"xmlns\"; " + + "use the Namespace class to manage namespaces"; + } + + // If we got here, everything is OK + return null; + } + + /** + * This will check the supplied string to see if it only contains + * characters allowed by the XML 1.0 specification. The C0 controls + * (e.g. null, vertical tab, formfeed, etc.) are specifically excluded + * except for carriage return, linefeed, and the horizontal tab. + * Surrogates are also excluded. + * <p> + * This method is useful for checking element content and attribute + * values. Note that characters + * like " and < are allowed in attribute values and element content. + * They will simply be escaped as " or < + * when the value is serialized. + * </p> + * + * @param text <code>String</code> value to check. + * @return <code>String</code> reason name is illegal, or + * <code>null</code> if name is OK. + */ + public static String checkCharacterData(String text) { + if (text == null) { + return "A null is not a legal XML value"; + } + + // Do check + for (int i = 0, len = text.length(); i<len; i++) { + + int ch = text.charAt(i); + + // Check if high part of a surrogate pair + if (isHighSurrogate((char) ch)) { + // Check if next char is the low-surrogate + i++; + if (i < len) { + char low = text.charAt(i); + if (!isLowSurrogate(low)) { + return "Illegal Surrogate Pair"; + } + // It's a good pair, calculate the true value of + // the character to then fall thru to isXMLCharacter + ch = decodeSurrogatePair((char) ch, low); + } + else { + return "Surrogate Pair Truncated"; + } + } + + if (!isXMLCharacter(ch)) { + // Likely this character can't be easily displayed + // because it's a control so we use it'd hexadecimal + // representation in the reason. + return ("0x" + Integer.toHexString(ch) + + " is not a legal XML character"); + } + } + + // If we got here, everything is OK + return null; + } + + /** + * This will check the supplied data to see if it is legal for use as + * JDOM <code>{@link CDATA}</code>. + * + * @param data <code>String</code> data to check. + * @return <code>String</code> reason data is illegal, or + * <code>null</code> is name is OK. + */ + public static String checkCDATASection(String data) { + String reason = null; + if ((reason = checkCharacterData(data)) != null) { + return reason; + } + + if (data.indexOf("]]>") != -1) { + return "CDATA cannot internally contain a CDATA ending " + + "delimiter (]]>)"; + } + + // If we got here, everything is OK + return null; + } + + /** + * This will check the supplied name to see if it is legal for use as + * a JDOM <code>{@link Namespace}</code> prefix. + * + * @param prefix <code>String</code> prefix to check. + * @return <code>String</code> reason name is illegal, or + * <code>null</code> if name is OK. + */ + public static String checkNamespacePrefix(String prefix) { + // Manually do rules, since URIs can be null or empty + if ((prefix == null) || (prefix.equals(""))) { + return null; + } + + // Cannot start with a number + char first = prefix.charAt(0); + if (isXMLDigit(first)) { + return "Namespace prefixes cannot begin with a number"; + } + // Cannot start with a $ + if (first == '$') { + return "Namespace prefixes cannot begin with a dollar sign ($)"; + } + // Cannot start with a - + if (first == '-') { + return "Namespace prefixes cannot begin with a hyphen (-)"; + } + // Cannot start with a . + if (first == '.') { + return "Namespace prefixes cannot begin with a period (.)"; + } + // Cannot start with "xml" in any character case + if (prefix.toLowerCase().startsWith("xml")) { + return "Namespace prefixes cannot begin with " + + "\"xml\" in any combination of case"; + } + + // Ensure legal content + for (int i=0, len = prefix.length(); i<len; i++) { + char c = prefix.charAt(i); + if (!isXMLNameCharacter(c)) { + return "Namespace prefixes cannot contain the character \"" + + c + "\""; + } + } + + // No colons allowed + if (prefix.indexOf(":") != -1) { + return "Namespace prefixes cannot contain colons"; + } + + // If we got here, everything is OK + return null; + } + + /** + * This will check the supplied name to see if it is legal for use as + * a JDOM <code>{@link Namespace}</code> URI. + * + * @param uri <code>String</code> URI to check. + * @return <code>String</code> reason name is illegal, or + * <code>null</code> if name is OK. + */ + public static String checkNamespaceURI(String uri) { + // Manually do rules, since URIs can be null or empty + if ((uri == null) || (uri.equals(""))) { + return null; + } + + // Cannot start with a number + char first = uri.charAt(0); + if (Character.isDigit(first)) { + return "Namespace URIs cannot begin with a number"; + } + // Cannot start with a $ + if (first == '$') { + return "Namespace URIs cannot begin with a dollar sign ($)"; + } + // Cannot start with a - + if (first == '-') { + return "Namespace URIs cannot begin with a hyphen (-)"; + } + + // If we got here, everything is OK + return null; + } + + /** + * Check if two namespaces collide. + * + * @param namespace <code>Namespace</code> to check. + * @param other <code>Namespace</code> to check against. + * @return <code>String</code> reason for collision, or + * <code>null</code> if no collision. + */ + public static String checkNamespaceCollision(Namespace namespace, + Namespace other) { + String p1,p2,u1,u2,reason; + + reason = null; + p1 = namespace.getPrefix(); + u1 = namespace.getURI(); + p2 = other.getPrefix(); + u2 = other.getURI(); + if (p1.equals(p2) && !u1.equals(u2)) { + reason = "The namespace prefix \"" + p1 + "\" collides"; + } + return reason; + } + + /** + * Check if <code>{@link Attribute}</code>'s namespace collides with a + * <code>{@link Element}</code>'s namespace. + * + * @param attribute <code>Attribute</code> to check. + * @param element <code>Element</code> to check against. + * @return <code>String</code> reason for collision, or + * <code>null</code> if no collision. + */ + public static String checkNamespaceCollision(Attribute attribute, + Element element) { + Namespace namespace = attribute.getNamespace(); + String prefix = namespace.getPrefix(); + if ("".equals(prefix)) { + return null; + } + + return checkNamespaceCollision(namespace, element); + } + + /** + * Check if a <code>{@link Namespace}</code> collides with a + * <code>{@link Element}</code>'s namespace. + * + * @param namespace <code>Namespace</code> to check. + * @param element <code>Element</code> to check against. + * @return <code>String</code> reason for collision, or + * <code>null</code> if no collision. + */ + public static String checkNamespaceCollision(Namespace namespace, + Element element) { + String reason = checkNamespaceCollision(namespace, + element.getNamespace()); + if (reason != null) { + return reason + " with the element namespace prefix"; + } + + reason = checkNamespaceCollision(namespace, + element.getAdditionalNamespaces()); + if (reason != null) { + return reason; + } + + reason = checkNamespaceCollision(namespace, element.getAttributes()); + if (reason != null) { + return reason; + } + + return null; + } + + /** + * Check if a <code>{@link Namespace}</code> collides with a + * <code>{@link Attribute}</code>'s namespace. + * + * @param namespace <code>Namespace</code> to check. + * @param attribute <code>Attribute</code> to check against. + * @return <code>String</code> reason for collision, or + * <code>null</code> if no collision. + */ + public static String checkNamespaceCollision(Namespace namespace, + Attribute attribute) { + String reason = null; + if (!attribute.getNamespace().equals(Namespace.NO_NAMESPACE)) { + reason = checkNamespaceCollision(namespace, + attribute.getNamespace()); + if (reason != null) { + reason += " with an attribute namespace prefix on the element"; + } + } + return reason; + } + + /** + * Check if a <code>{@link Namespace}</code> collides with any namespace + * from a list of objects. + * + * @param namespace <code>Namespace</code> to check. + * @param list <code>List</code> to check against. + * @return <code>String</code> reason for collision, or + * <code>null</code> if no collision. + */ + public static String checkNamespaceCollision(Namespace namespace, + List list) { + if (list == null) { + return null; + } + + String reason = null; + Iterator i = list.iterator(); + while ((reason == null) && i.hasNext()) { + Object obj = i.next(); + if (obj instanceof Attribute) { + reason = checkNamespaceCollision(namespace, (Attribute) obj); + } + else if (obj instanceof Element) { + reason = checkNamespaceCollision(namespace, (Element) obj); + } + else if (obj instanceof Namespace) { + reason = checkNamespaceCollision(namespace, (Namespace) obj); + if (reason != null) { + reason += " with an additional namespace declared" + + " by the element"; + } + } + } + return reason; + } + + /** + * This will check the supplied data to see if it is legal for use as + * a JDOM <code>{@link ProcessingInstruction}</code> target. + * + * @param target <code>String</code> target to check. + * @return <code>String</code> reason target is illegal, or + * <code>null</code> if target is OK. + */ + public static String checkProcessingInstructionTarget(String target) { + // Check basic XML name rules first + String reason; + if ((reason = checkXMLName(target)) != null) { + return reason; + } + + // No colons allowed, per Namespace Specification Section 6 + if (target.indexOf(":") != -1) { + return "Processing instruction targets cannot contain colons"; + } + + // Cannot begin with 'xml' in any case + if (target.equalsIgnoreCase("xml")) { + return "Processing instructions cannot have a target of " + + "\"xml\" in any combination of case. (Note that the " + + "\"<?xml ... ?>\" declaration at the beginning of a " + + "document is not a processing instruction and should not " + + "be added as one; it is written automatically during " + + "output, e.g. by XMLOutputter.)"; + } + + // If we got here, everything is OK + return null; + } + + /** + * This will check the supplied data to see if it is legal for use as + * <code>{@link ProcessingInstruction}</code> data. Besides checking that + * all the characters are allowed in XML, this also checks + * that the data does not contain the PI end-string "?>". + * + * @param data <code>String</code> data to check. + * @return <code>String</code> reason data is illegal, or + * <code>null</code> if data is OK. + */ + public static String checkProcessingInstructionData(String data) { + // Check basic XML name rules first + String reason = checkCharacterData(data); + + if (reason == null) { + if (data.indexOf("?>") >= 0) { + return "Processing instructions cannot contain " + + "the string \"?>\""; + } + } + + return reason; + } + + /** + * This will check the supplied data to see if it is legal for use as + * JDOM <code>{@link Comment}</code> data. + * + * @param data <code>String</code> data to check. + * @return <code>String</code> reason data is illegal, or + * <code>null</code> if data is OK. + */ + public static String checkCommentData(String data) { + String reason = null; + if ((reason = checkCharacterData(data)) != null) { + return reason; + } + + if (data.indexOf("--") != -1) { + return "Comments cannot contain double hyphens (--)"; + } + if (data.endsWith("-")) { + return "Comment data cannot end with a hyphen."; + } + + // If we got here, everything is OK + return null; + } + /** + * This is a utility function to decode a non-BMP + * UTF-16 surrogate pair. + * @param high high 16 bits + * @param low low 16 bits + * @return decoded character + */ + public static int decodeSurrogatePair(char high, char low) { + return 0x10000 + (high - 0xD800) * 0x400 + (low - 0xDC00); + } + + // [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | + // [-'()+,./:=?;*#@$_%] + public static boolean isXMLPublicIDCharacter(char c) { + + if (c >= 'a' && c <= 'z') return true; + if (c >= '?' && c <= 'Z') return true; + if (c >= '\'' && c <= ';') return true; + + if (c == ' ') return true; + if (c == '!') return true; + if (c == '=') return true; + if (c == '#') return true; + if (c == '$') return true; + if (c == '_') return true; + if (c == '%') return true; + if (c == '\n') return true; + if (c == '\r') return true; + if (c == '\t') return true; + + return false; + } + + /** + * This will ensure that the data for a public identifier + * is legal. + * + * @param publicID <code>String</code> public ID to check. + * @return <code>String</code> reason public ID is illegal, or + * <code>null</code> if public ID is OK. + */ + public static String checkPublicID(String publicID) { + String reason = null; + + if (publicID == null) return null; + // This indicates there is no public ID + + for (int i = 0; i < publicID.length(); i++) { + char c = publicID.charAt(i); + if (!isXMLPublicIDCharacter(c)) { + reason = c + " is not a legal character in public IDs"; + break; + } + } + + return reason; + } + + + /** + * This will ensure that the data for a system literal + * is legal. + * + * @param systemLiteral <code>String</code> system literal to check. + * @return <code>String</code> reason system literal is illegal, or + * <code>null</code> if system literal is OK. + */ + public static String checkSystemLiteral(String systemLiteral) { + String reason = null; + + if (systemLiteral == null) return null; + // This indicates there is no system ID + + if (systemLiteral.indexOf('\'') != -1 + && systemLiteral.indexOf('"') != -1) { + reason = + "System literals cannot simultaneously contain both single and double quotes."; + } + else { + reason = checkCharacterData(systemLiteral); + } + + return reason; + } + + /** + * This is a utility function for sharing the base process of checking + * any XML name. + * + * @param name <code>String</code> to check for XML name compliance. + * @return <code>String</code> reason the name is illegal, or + * <code>null</code> if OK. + */ + public static String checkXMLName(String name) { + // Cannot be empty or null + if ((name == null) || (name.length() == 0) + || (name.trim().equals(""))) { + return "XML names cannot be null or empty"; + } + + + // Cannot start with a number + char first = name.charAt(0); + if (!isXMLNameStartCharacter(first)) { + return "XML names cannot begin with the character \"" + + first + "\""; + } + // Ensure legal content for non-first chars + for (int i=1, len = name.length(); i<len; i++) { + char c = name.charAt(i); + if (!isXMLNameCharacter(c)) { + return "XML names cannot contain the character \"" + c + "\""; + } + } + + // We got here, so everything is OK + return null; + } + + /** + * <p> + * Checks a string to see if it is a legal RFC 2396 URI. + * Both absolute and relative URIs are supported. + * </p> + * + * @param uri <code>String</code> to check. + * @return <code>String</code> reason the URI is illegal, or + * <code>null</code> if OK. + */ + public static String checkURI(String uri) { + // URIs can be null or empty + if ((uri == null) || (uri.equals(""))) { + return null; + } + + for (int i = 0; i < uri.length(); i++) { + char test = uri.charAt(i); + if (!isURICharacter(test)) { + String msgNumber = "0x" + Integer.toHexString(test); + if (test <= 0x09) msgNumber = "0x0" + Integer.toHexString(test); + return "URIs cannot contain " + msgNumber; + } // end if + if (test == '%') { // must be followed by two hexadecimal digits + try { + char firstDigit = uri.charAt(i+1); + char secondDigit = uri.charAt(i+2); + if (!isHexDigit(firstDigit) || + !isHexDigit(secondDigit)) { + return "Percent signs in URIs must be followed by " + + "exactly two hexadecimal digits."; + } + + } + catch (StringIndexOutOfBoundsException e) { + return "Percent signs in URIs must be followed by " + + "exactly two hexadecimal digits."; + } + } + } // end for + + // If we got here, everything is OK + return null; + } + + /** + * <p> + * This is a utility function for determining whether a specified + * Unicode character is a hexadecimal digit as defined in RFC 2396; + * that is, one of the ASCII characters 0-9, a-f, or A-F. + * </p> + * + * @param c to check for hex digit. + * @return true if it's allowed, false otherwise. + */ + public static boolean isHexDigit(char c) { + + // I suspect most characters passed to this method will be + // correct hexadecimal digits, so I test for the true cases + // first. If this proves to be a performance bottleneck + // a switch statement or lookup table + // might optimize this. + if (c >= '0' && c <= '9') return true; + if (c >= 'A' && c <= 'F') return true; + if (c >= 'a' && c <= 'f') return true; + + return false; + } + + /** + * This is a function for determining whether the + * specified character is the high 16 bits in a + * UTF-16 surrogate pair. + * @param ch character to check + * @return true if the character is a high surrogate, false otherwise + */ + public static boolean isHighSurrogate(char ch) { + return (ch >= 0xD800 && ch <= 0xDBFF); + } + + /** + * This is a function for determining whether the + * specified character is the low 16 bits in a + * UTF-16 surrogate pair. + * @param ch character to check + * @return true if the character is a low surrogate, false otherwise. + */ + public static boolean isLowSurrogate(char ch) { + return (ch >= 0xDC00 && ch <= 0xDFFF); + } + + /** + * <p> + * This is a utility function for determining whether + * a specified Unicode character is legal in URI references + * as determined by RFC 2396. + * </p> + * + * @param c <code>char</code> to check for URI reference compliance. + * @return true if it's allowed, false otherwise. + */ + public static boolean isURICharacter(char c) { + if (c >= 'a' && c <= 'z') return true; + if (c >= 'A' && c <= 'Z') return true; + if (c >= '0' && c <= '9') return true; + if (c == '/') return true; + if (c == '-') return true; + if (c == '.') return true; + if (c == '?') return true; + if (c == ':') return true; + if (c == '@') return true; + if (c == '&') return true; + if (c == '=') return true; + if (c == '+') return true; + if (c == '$') return true; + if (c == ',') return true; + if (c == '%') return true; + + if (c == '_') return true; + if (c == '!') return true; + if (c == '~') return true; + if (c == '*') return true; + if (c == '\'') return true; + if (c == '(') return true; + if (c == ')') return true; + return false; + } + + /** + * This is a utility function for determining whether a specified + * character is a character according to production 2 of the + * XML 1.0 specification. + * + * @param c <code>char</code> to check for XML compliance + * @return <code>boolean</code> true if it's a character, + * false otherwise + */ + public static boolean isXMLCharacter(int c) { + + if (c == '\n') return true; + if (c == '\r') return true; + if (c == '\t') return true; + + if (c < 0x20) return false; if (c <= 0xD7FF) return true; + if (c < 0xE000) return false; if (c <= 0xFFFD) return true; + if (c < 0x10000) return false; if (c <= 0x10FFFF) return true; + + return false; + } + + + /** + * This is a utility function for determining whether a specified + * character is a name character according to production 4 of the + * XML 1.0 specification. + * + * @param c <code>char</code> to check for XML name compliance. + * @return <code>boolean</code> true if it's a name character, + * false otherwise. + */ + public static boolean isXMLNameCharacter(char c) { + + return (isXMLLetter(c) || isXMLDigit(c) || c == '.' || c == '-' + || c == '_' || c == ':' || isXMLCombiningChar(c) + || isXMLExtender(c)); + } + + /** + * This is a utility function for determining whether a specified + * character is a legal name start character according to production 5 + * of the XML 1.0 specification. This production does allow names + * to begin with colons which the Namespaces in XML Recommendation + * disallows. + * + * @param c <code>char</code> to check for XML name start compliance. + * @return <code>boolean</code> true if it's a name start character, + * false otherwise. + */ + public static boolean isXMLNameStartCharacter(char c) { + + return (isXMLLetter(c) || c == '_' || c ==':'); + + } + + /** + * This is a utility function for determining whether a specified + * character is a letter or digit according to productions 84 and 88 + * of the XML 1.0 specification. + * + * @param c <code>char</code> to check. + * @return <code>boolean</code> true if it's letter or digit, + * false otherwise. + */ + public static boolean isXMLLetterOrDigit(char c) { + + return (isXMLLetter(c) || isXMLDigit(c)); + + } + + /** + * This is a utility function for determining whether a specified character + * is a letter according to production 84 of the XML 1.0 specification. + * + * @param c <code>char</code> to check for XML name compliance. + * @return <code>String</code> true if it's a letter, false otherwise. + */ + public static boolean isXMLLetter(char c) { + // Note that order is very important here. The search proceeds + // from lowest to highest values, so that no searching occurs + // above the character's value. BTW, the first line is equivalent to: + // if (c >= 0x0041 && c <= 0x005A) return true; + + if (c < 0x0041) return false; if (c <= 0x005a) return true; + if (c < 0x0061) return false; if (c <= 0x007A) return true; + if (c < 0x00C0) return false; if (c <= 0x00D6) return true; + if (c < 0x00D8) return false; if (c <= 0x00F6) return true; + if (c < 0x00F8) return false; if (c <= 0x00FF) return true; + if (c < 0x0100) return false; if (c <= 0x0131) return true; + if (c < 0x0134) return false; if (c <= 0x013E) return true; + if (c < 0x0141) return false; if (c <= 0x0148) return true; + if (c < 0x014A) return false; if (c <= 0x017E) return true; + if (c < 0x0180) return false; if (c <= 0x01C3) return true; + if (c < 0x01CD) return false; if (c <= 0x01F0) return true; + if (c < 0x01F4) return false; if (c <= 0x01F5) return true; + if (c < 0x01FA) return false; if (c <= 0x0217) return true; + if (c < 0x0250) return false; if (c <= 0x02A8) return true; + if (c < 0x02BB) return false; if (c <= 0x02C1) return true; + if (c == 0x0386) return true; + if (c < 0x0388) return false; if (c <= 0x038A) return true; + if (c == 0x038C) return true; + if (c < 0x038E) return false; if (c <= 0x03A1) return true; + if (c < 0x03A3) return false; if (c <= 0x03CE) return true; + if (c < 0x03D0) return false; if (c <= 0x03D6) return true; + if (c == 0x03DA) return true; + if (c == 0x03DC) return true; + if (c == 0x03DE) return true; + if (c == 0x03E0) return true; + if (c < 0x03E2) return false; if (c <= 0x03F3) return true; + if (c < 0x0401) return false; if (c <= 0x040C) return true; + if (c < 0x040E) return false; if (c <= 0x044F) return true; + if (c < 0x0451) return false; if (c <= 0x045C) return true; + if (c < 0x045E) return false; if (c <= 0x0481) return true; + if (c < 0x0490) return false; if (c <= 0x04C4) return true; + if (c < 0x04C7) return false; if (c <= 0x04C8) return true; + if (c < 0x04CB) return false; if (c <= 0x04CC) return true; + if (c < 0x04D0) return false; if (c <= 0x04EB) return true; + if (c < 0x04EE) return false; if (c <= 0x04F5) return true; + if (c < 0x04F8) return false; if (c <= 0x04F9) return true; + if (c < 0x0531) return false; if (c <= 0x0556) return true; + if (c == 0x0559) return true; + if (c < 0x0561) return false; if (c <= 0x0586) return true; + if (c < 0x05D0) return false; if (c <= 0x05EA) return true; + if (c < 0x05F0) return false; if (c <= 0x05F2) return true; + if (c < 0x0621) return false; if (c <= 0x063A) return true; + if (c < 0x0641) return false; if (c <= 0x064A) return true; + if (c < 0x0671) return false; if (c <= 0x06B7) return true; + if (c < 0x06BA) return false; if (c <= 0x06BE) return true; + if (c < 0x06C0) return false; if (c <= 0x06CE) return true; + if (c < 0x06D0) return false; if (c <= 0x06D3) return true; + if (c == 0x06D5) return true; + if (c < 0x06E5) return false; if (c <= 0x06E6) return true; + if (c < 0x0905) return false; if (c <= 0x0939) return true; + if (c == 0x093D) return true; + if (c < 0x0958) return false; if (c <= 0x0961) return true; + if (c < 0x0985) return false; if (c <= 0x098C) return true; + if (c < 0x098F) return false; if (c <= 0x0990) return true; + if (c < 0x0993) return false; if (c <= 0x09A8) return true; + if (c < 0x09AA) return false; if (c <= 0x09B0) return true; + if (c == 0x09B2) return true; + if (c < 0x09B6) return false; if (c <= 0x09B9) return true; + if (c < 0x09DC) return false; if (c <= 0x09DD) return true; + if (c < 0x09DF) return false; if (c <= 0x09E1) return true; + if (c < 0x09F0) return false; if (c <= 0x09F1) return true; + if (c < 0x0A05) return false; if (c <= 0x0A0A) return true; + if (c < 0x0A0F) return false; if (c <= 0x0A10) return true; + if (c < 0x0A13) return false; if (c <= 0x0A28) return true; + if (c < 0x0A2A) return false; if (c <= 0x0A30) return true; + if (c < 0x0A32) return false; if (c <= 0x0A33) return true; + if (c < 0x0A35) return false; if (c <= 0x0A36) return true; + if (c < 0x0A38) return false; if (c <= 0x0A39) return true; + if (c < 0x0A59) return false; if (c <= 0x0A5C) return true; + if (c == 0x0A5E) return true; + if (c < 0x0A72) return false; if (c <= 0x0A74) return true; + if (c < 0x0A85) return false; if (c <= 0x0A8B) return true; + if (c == 0x0A8D) return true; + if (c < 0x0A8F) return false; if (c <= 0x0A91) return true; + if (c < 0x0A93) return false; if (c <= 0x0AA8) return true; + if (c < 0x0AAA) return false; if (c <= 0x0AB0) return true; + if (c < 0x0AB2) return false; if (c <= 0x0AB3) return true; + if (c < 0x0AB5) return false; if (c <= 0x0AB9) return true; + if (c == 0x0ABD) return true; + if (c == 0x0AE0) return true; + if (c < 0x0B05) return false; if (c <= 0x0B0C) return true; + if (c < 0x0B0F) return false; if (c <= 0x0B10) return true; + if (c < 0x0B13) return false; if (c <= 0x0B28) return true; + if (c < 0x0B2A) return false; if (c <= 0x0B30) return true; + if (c < 0x0B32) return false; if (c <= 0x0B33) return true; + if (c < 0x0B36) return false; if (c <= 0x0B39) return true; + if (c == 0x0B3D) return true; + if (c < 0x0B5C) return false; if (c <= 0x0B5D) return true; + if (c < 0x0B5F) return false; if (c <= 0x0B61) return true; + if (c < 0x0B85) return false; if (c <= 0x0B8A) return true; + if (c < 0x0B8E) return false; if (c <= 0x0B90) return true; + if (c < 0x0B92) return false; if (c <= 0x0B95) return true; + if (c < 0x0B99) return false; if (c <= 0x0B9A) return true; + if (c == 0x0B9C) return true; + if (c < 0x0B9E) return false; if (c <= 0x0B9F) return true; + if (c < 0x0BA3) return false; if (c <= 0x0BA4) return true; + if (c < 0x0BA8) return false; if (c <= 0x0BAA) return true; + if (c < 0x0BAE) return false; if (c <= 0x0BB5) return true; + if (c < 0x0BB7) return false; if (c <= 0x0BB9) return true; + if (c < 0x0C05) return false; if (c <= 0x0C0C) return true; + if (c < 0x0C0E) return false; if (c <= 0x0C10) return true; + if (c < 0x0C12) return false; if (c <= 0x0C28) return true; + if (c < 0x0C2A) return false; if (c <= 0x0C33) return true; + if (c < 0x0C35) return false; if (c <= 0x0C39) return true; + if (c < 0x0C60) return false; if (c <= 0x0C61) return true; + if (c < 0x0C85) return false; if (c <= 0x0C8C) return true; + if (c < 0x0C8E) return false; if (c <= 0x0C90) return true; + if (c < 0x0C92) return false; if (c <= 0x0CA8) return true; + if (c < 0x0CAA) return false; if (c <= 0x0CB3) return true; + if (c < 0x0CB5) return false; if (c <= 0x0CB9) return true; + if (c == 0x0CDE) return true; + if (c < 0x0CE0) return false; if (c <= 0x0CE1) return true; + if (c < 0x0D05) return false; if (c <= 0x0D0C) return true; + if (c < 0x0D0E) return false; if (c <= 0x0D10) return true; + if (c < 0x0D12) return false; if (c <= 0x0D28) return true; + if (c < 0x0D2A) return false; if (c <= 0x0D39) return true; + if (c < 0x0D60) return false; if (c <= 0x0D61) return true; + if (c < 0x0E01) return false; if (c <= 0x0E2E) return true; + if (c == 0x0E30) return true; + if (c < 0x0E32) return false; if (c <= 0x0E33) return true; + if (c < 0x0E40) return false; if (c <= 0x0E45) return true; + if (c < 0x0E81) return false; if (c <= 0x0E82) return true; + if (c == 0x0E84) return true; + if (c < 0x0E87) return false; if (c <= 0x0E88) return true; + if (c == 0x0E8A) return true; + if (c == 0x0E8D) return true; + if (c < 0x0E94) return false; if (c <= 0x0E97) return true; + if (c < 0x0E99) return false; if (c <= 0x0E9F) return true; + if (c < 0x0EA1) return false; if (c <= 0x0EA3) return true; + if (c == 0x0EA5) return true; + if (c == 0x0EA7) return true; + if (c < 0x0EAA) return false; if (c <= 0x0EAB) return true; + if (c < 0x0EAD) return false; if (c <= 0x0EAE) return true; + if (c == 0x0EB0) return true; + if (c < 0x0EB2) return false; if (c <= 0x0EB3) return true; + if (c == 0x0EBD) return true; + if (c < 0x0EC0) return false; if (c <= 0x0EC4) return true; + if (c < 0x0F40) return false; if (c <= 0x0F47) return true; + if (c < 0x0F49) return false; if (c <= 0x0F69) return true; + if (c < 0x10A0) return false; if (c <= 0x10C5) return true; + if (c < 0x10D0) return false; if (c <= 0x10F6) return true; + if (c == 0x1100) return true; + if (c < 0x1102) return false; if (c <= 0x1103) return true; + if (c < 0x1105) return false; if (c <= 0x1107) return true; + if (c == 0x1109) return true; + if (c < 0x110B) return false; if (c <= 0x110C) return true; + if (c < 0x110E) return false; if (c <= 0x1112) return true; + if (c == 0x113C) return true; + if (c == 0x113E) return true; + if (c == 0x1140) return true; + if (c == 0x114C) return true; + if (c == 0x114E) return true; + if (c == 0x1150) return true; + if (c < 0x1154) return false; if (c <= 0x1155) return true; + if (c == 0x1159) return true; + if (c < 0x115F) return false; if (c <= 0x1161) return true; + if (c == 0x1163) return true; + if (c == 0x1165) return true; + if (c == 0x1167) return true; + if (c == 0x1169) return true; + if (c < 0x116D) return false; if (c <= 0x116E) return true; + if (c < 0x1172) return false; if (c <= 0x1173) return true; + if (c == 0x1175) return true; + if (c == 0x119E) return true; + if (c == 0x11A8) return true; + if (c == 0x11AB) return true; + if (c < 0x11AE) return false; if (c <= 0x11AF) return true; + if (c < 0x11B7) return false; if (c <= 0x11B8) return true; + if (c == 0x11BA) return true; + if (c < 0x11BC) return false; if (c <= 0x11C2) return true; + if (c == 0x11EB) return true; + if (c == 0x11F0) return true; + if (c == 0x11F9) return true; + if (c < 0x1E00) return false; if (c <= 0x1E9B) return true; + if (c < 0x1EA0) return false; if (c <= 0x1EF9) return true; + if (c < 0x1F00) return false; if (c <= 0x1F15) return true; + if (c < 0x1F18) return false; if (c <= 0x1F1D) return true; + if (c < 0x1F20) return false; if (c <= 0x1F45) return true; + if (c < 0x1F48) return false; if (c <= 0x1F4D) return true; + if (c < 0x1F50) return false; if (c <= 0x1F57) return true; + if (c == 0x1F59) return true; + if (c == 0x1F5B) return true; + if (c == 0x1F5D) return true; + if (c < 0x1F5F) return false; if (c <= 0x1F7D) return true; + if (c < 0x1F80) return false; if (c <= 0x1FB4) return true; + if (c < 0x1FB6) return false; if (c <= 0x1FBC) return true; + if (c == 0x1FBE) return true; + if (c < 0x1FC2) return false; if (c <= 0x1FC4) return true; + if (c < 0x1FC6) return false; if (c <= 0x1FCC) return true; + if (c < 0x1FD0) return false; if (c <= 0x1FD3) return true; + if (c < 0x1FD6) return false; if (c <= 0x1FDB) return true; + if (c < 0x1FE0) return false; if (c <= 0x1FEC) return true; + if (c < 0x1FF2) return false; if (c <= 0x1FF4) return true; + if (c < 0x1FF6) return false; if (c <= 0x1FFC) return true; + if (c == 0x2126) return true; + if (c < 0x212A) return false; if (c <= 0x212B) return true; + if (c == 0x212E) return true; + if (c < 0x2180) return false; if (c <= 0x2182) return true; + if (c == 0x3007) return true; // ideographic + if (c < 0x3021) return false; if (c <= 0x3029) return true; // ideo + if (c < 0x3041) return false; if (c <= 0x3094) return true; + if (c < 0x30A1) return false; if (c <= 0x30FA) return true; + if (c < 0x3105) return false; if (c <= 0x312C) return true; + if (c < 0x4E00) return false; if (c <= 0x9FA5) return true; // ideo + if (c < 0xAC00) return false; if (c <= 0xD7A3) return true; + + return false; + + } + + /** + * This is a utility function for determining whether a specified character + * is a combining character according to production 87 + * of the XML 1.0 specification. + * + * @param c <code>char</code> to check. + * @return <code>boolean</code> true if it's a combining character, + * false otherwise. + */ + public static boolean isXMLCombiningChar(char c) { + // CombiningChar + if (c < 0x0300) return false; if (c <= 0x0345) return true; + if (c < 0x0360) return false; if (c <= 0x0361) return true; + if (c < 0x0483) return false; if (c <= 0x0486) return true; + if (c < 0x0591) return false; if (c <= 0x05A1) return true; + + if (c < 0x05A3) return false; if (c <= 0x05B9) return true; + if (c < 0x05BB) return false; if (c <= 0x05BD) return true; + if (c == 0x05BF) return true; + if (c < 0x05C1) return false; if (c <= 0x05C2) return true; + + if (c == 0x05C4) return true; + if (c < 0x064B) return false; if (c <= 0x0652) return true; + if (c == 0x0670) return true; + if (c < 0x06D6) return false; if (c <= 0x06DC) return true; + + if (c < 0x06DD) return false; if (c <= 0x06DF) return true; + if (c < 0x06E0) return false; if (c <= 0x06E4) return true; + if (c < 0x06E7) return false; if (c <= 0x06E8) return true; + + if (c < 0x06EA) return false; if (c <= 0x06ED) return true; + if (c < 0x0901) return false; if (c <= 0x0903) return true; + if (c == 0x093C) return true; + if (c < 0x093E) return false; if (c <= 0x094C) return true; + + if (c == 0x094D) return true; + if (c < 0x0951) return false; if (c <= 0x0954) return true; + if (c < 0x0962) return false; if (c <= 0x0963) return true; + if (c < 0x0981) return false; if (c <= 0x0983) return true; + + if (c == 0x09BC) return true; + if (c == 0x09BE) return true; + if (c == 0x09BF) return true; + if (c < 0x09C0) return false; if (c <= 0x09C4) return true; + if (c < 0x09C7) return false; if (c <= 0x09C8) return true; + + if (c < 0x09CB) return false; if (c <= 0x09CD) return true; + if (c == 0x09D7) return true; + if (c < 0x09E2) return false; if (c <= 0x09E3) return true; + if (c == 0x0A02) return true; + if (c == 0x0A3C) return true; + + if (c == 0x0A3E) return true; + if (c == 0x0A3F) return true; + if (c < 0x0A40) return false; if (c <= 0x0A42) return true; + if (c < 0x0A47) return false; if (c <= 0x0A48) return true; + + if (c < 0x0A4B) return false; if (c <= 0x0A4D) return true; + if (c < 0x0A70) return false; if (c <= 0x0A71) return true; + if (c < 0x0A81) return false; if (c <= 0x0A83) return true; + if (c == 0x0ABC) return true; + + if (c < 0x0ABE) return false; if (c <= 0x0AC5) return true; + if (c < 0x0AC7) return false; if (c <= 0x0AC9) return true; + if (c < 0x0ACB) return false; if (c <= 0x0ACD) return true; + + if (c < 0x0B01) return false; if (c <= 0x0B03) return true; + if (c == 0x0B3C) return true; + if (c < 0x0B3E) return false; if (c <= 0x0B43) return true; + if (c < 0x0B47) return false; if (c <= 0x0B48) return true; + + if (c < 0x0B4B) return false; if (c <= 0x0B4D) return true; + if (c < 0x0B56) return false; if (c <= 0x0B57) return true; + if (c < 0x0B82) return false; if (c <= 0x0B83) return true; + + if (c < 0x0BBE) return false; if (c <= 0x0BC2) return true; + if (c < 0x0BC6) return false; if (c <= 0x0BC8) return true; + if (c < 0x0BCA) return false; if (c <= 0x0BCD) return true; + if (c == 0x0BD7) return true; + + if (c < 0x0C01) return false; if (c <= 0x0C03) return true; + if (c < 0x0C3E) return false; if (c <= 0x0C44) return true; + if (c < 0x0C46) return false; if (c <= 0x0C48) return true; + + if (c < 0x0C4A) return false; if (c <= 0x0C4D) return true; + if (c < 0x0C55) return false; if (c <= 0x0C56) return true; + if (c < 0x0C82) return false; if (c <= 0x0C83) return true; + + if (c < 0x0CBE) return false; if (c <= 0x0CC4) return true; + if (c < 0x0CC6) return false; if (c <= 0x0CC8) return true; + if (c < 0x0CCA) return false; if (c <= 0x0CCD) return true; + + if (c < 0x0CD5) return false; if (c <= 0x0CD6) return true; + if (c < 0x0D02) return false; if (c <= 0x0D03) return true; + if (c < 0x0D3E) return false; if (c <= 0x0D43) return true; + + if (c < 0x0D46) return false; if (c <= 0x0D48) return true; + if (c < 0x0D4A) return false; if (c <= 0x0D4D) return true; + if (c == 0x0D57) return true; + if (c == 0x0E31) return true; + + if (c < 0x0E34) return false; if (c <= 0x0E3A) return true; + if (c < 0x0E47) return false; if (c <= 0x0E4E) return true; + if (c == 0x0EB1) return true; + if (c < 0x0EB4) return false; if (c <= 0x0EB9) return true; + + if (c < 0x0EBB) return false; if (c <= 0x0EBC) return true; + if (c < 0x0EC8) return false; if (c <= 0x0ECD) return true; + if (c < 0x0F18) return false; if (c <= 0x0F19) return true; + if (c == 0x0F35) return true; + + if (c == 0x0F37) return true; + if (c == 0x0F39) return true; + if (c == 0x0F3E) return true; + if (c == 0x0F3F) return true; + if (c < 0x0F71) return false; if (c <= 0x0F84) return true; + + if (c < 0x0F86) return false; if (c <= 0x0F8B) return true; + if (c < 0x0F90) return false; if (c <= 0x0F95) return true; + if (c == 0x0F97) return true; + if (c < 0x0F99) return false; if (c <= 0x0FAD) return true; + + if (c < 0x0FB1) return false; if (c <= 0x0FB7) return true; + if (c == 0x0FB9) return true; + if (c < 0x20D0) return false; if (c <= 0x20DC) return true; + if (c == 0x20E1) return true; + + if (c < 0x302A) return false; if (c <= 0x302F) return true; + if (c == 0x3099) return true; + if (c == 0x309A) return true; + + return false; + + } + + /** + * This is a utility function for determining whether a specified + * character is an extender according to production 88 of the XML 1.0 + * specification. + * + * @param c <code>char</code> to check. + * @return <code>String</code> true if it's an extender, false otherwise. + */ + public static boolean isXMLExtender(char c) { + + if (c < 0x00B6) return false; // quick short circuit + + // Extenders + if (c == 0x00B7) return true; + if (c == 0x02D0) return true; + if (c == 0x02D1) return true; + if (c == 0x0387) return true; + if (c == 0x0640) return true; + if (c == 0x0E46) return true; + if (c == 0x0EC6) return true; + if (c == 0x3005) return true; + + if (c < 0x3031) return false; if (c <= 0x3035) return true; + if (c < 0x309D) return false; if (c <= 0x309E) return true; + if (c < 0x30FC) return false; if (c <= 0x30FE) return true; + + return false; + + } + + /** + * This is a utility function for determining whether a specified + * Unicode character + * is a digit according to production 88 of the XML 1.0 specification. + * + * @param c <code>char</code> to check for XML digit compliance + * @return <code>boolean</code> true if it's a digit, false otherwise + */ + public static boolean isXMLDigit(char c) { + + if (c < 0x0030) return false; if (c <= 0x0039) return true; + if (c < 0x0660) return false; if (c <= 0x0669) return true; + if (c < 0x06F0) return false; if (c <= 0x06F9) return true; + if (c < 0x0966) return false; if (c <= 0x096F) return true; + + if (c < 0x09E6) return false; if (c <= 0x09EF) return true; + if (c < 0x0A66) return false; if (c <= 0x0A6F) return true; + if (c < 0x0AE6) return false; if (c <= 0x0AEF) return true; + + if (c < 0x0B66) return false; if (c <= 0x0B6F) return true; + if (c < 0x0BE7) return false; if (c <= 0x0BEF) return true; + if (c < 0x0C66) return false; if (c <= 0x0C6F) return true; + + if (c < 0x0CE6) return false; if (c <= 0x0CEF) return true; + if (c < 0x0D66) return false; if (c <= 0x0D6F) return true; + if (c < 0x0E50) return false; if (c <= 0x0E59) return true; + + if (c < 0x0ED0) return false; if (c <= 0x0ED9) return true; + if (c < 0x0F20) return false; if (c <= 0x0F29) return true; + + return false; + } + + /** + * This is a utility function for determining whether a specified + * Unicode character is a whitespace character according to production 3 + * of the XML 1.0 specification. + * + * @param c <code>char</code> to check for XML whitespace compliance + * @return <code>boolean</code> true if it's a whitespace, false otherwise + */ + public static boolean isXMLWhitespace(char c) { + if (c==' ' || c=='\n' || c=='\t' || c=='\r' ){ + return true; + } + return false; + } +}