/*
 * (C) Copyright IBM Corp. 1997-1998  All rights reserved.
 *
 * US Government Users Restricted Rights Use, duplication or
 * disclosure restricted by GSA ADP Schedule Contract with IBM Corp.
 *
 * The program is provided "as is" without any warranty express or
 * implied, including the warranty of non-infringement and the implied
 * warranties of merchantibility and fitness for a particular purpose.
 * IBM will not be liable for any damages suffered by you as a result
 * of using the Program. In no event will IBM be liable for any
 * special, indirect or consequential damages or lost profits even if
 * IBM has been advised of the possibility of their occurrence. IBM
 * will not be liable for any third party claims against you.
 */

package com.ibm.xml.parser;

import java.util.Vector;
import java.io.IOException;

/**
 * Util is a collection of XML4J utility routines which check the conformance of various 
 * XML-defined values (XML name, language ID, encoding ID), and which provide services for 
 * converting strings to XML format. 
 *
 * @version Revision: 42 1.14 src/com/ibm/xml/parser/Util.java, xml4jsrc, xml4j-jtcsv, xml4j_1_1_16 
 * @author TAMURA Kent &lt;kent@trl.ibm.co.jp&gt;
 */
public class Util {
    
    /**
     * Returns whether the specified <var>name</var> conforms to <CODE>Name</CODE> in XML 1.0.
     * Refer to <A href="http://www.w3.org/TR/1998/REC-xml-19980210#NT-Name"> 
     * the definition of <CODE>Name</CODE></A> for details.
     * @param   name    Name to be checked as a valid XML Name.
     * @return          =true if name complies with XML spec; otherwise =false.
     */
    public static boolean checkName(String name) {
        if (1 > name.length())  return false;
        char ch = name.charAt(0);
        if (!(XMLChar.isLetter(ch) || '_' == ch || ':' == ch))  return false;
        for (int i = 1;  i < name.length();  i ++) {
            if (!XMLChar.isNameChar(name.charAt(i)))  return false;
        }
        return true;
    }

    /**
     * Returns whether the specified <var>name</var> conforms to <CODE>NCName</CODE> in `Namespaces in XML'.
     * Refer to <A href="http://www.w3.org/TR/1998/WD-xml-names-19980802.html#NT-NCName"> 
     * the definition of <CODE>NCName</CODE></A> for details.
     * @param   name    Name to be checked as a valid XML NCName.
     * @return          =true if name complies with XML spec; otherwise =false.
     */
    public static boolean checkNCName(String name) {
        if (1 > name.length())  return false;
        int ch = name.charAt(0);
        if (!(XMLChar.isLetter(ch) || '_' == ch))  return false;
        for (int i = 1;  i < name.length();  i ++) {
            ch = name.charAt(i);
            if (ch == ':' || !XMLChar.isNameChar(ch))  return false;
        }
        return true;
    }

    /**
     * Returns whether the specified <var>nmtoken</var> conforms to <CODE>Nmtoken</CODE> in XML 1.0.
     * Refer to <A href="http://www.w3.org/TR/1998/REC-xml-19980210#NT-Nmtoken">
     * the definition of <CODE>Nmtoken</CODE></A> for details.
     * @param   nmtoken NMToken to be checked as a valid XML NMToken.
     * @return          =true if name complies with XML spec; otherwise =false.
     */
    public static boolean checkNmtoken(String nmtoken) {
        if (1 > nmtoken.length())  return false;
        for (int i = 0;  i < nmtoken.length();  i ++) {
            if (!XMLChar.isNameChar(nmtoken.charAt(i)))  return false;
        }
        return true;
    }

    /**
     * Returns whether the specified <var>string</var> consists of only XML whitespace.
     * Refer to <A href="http://www.w3.org/TR/1998/REC-xml-19980210#NT-S">
     * the definition of <CODE>S</CODE></A> for details.
     * @param   string  String to be checked if it constains all XML whitespace.
     * @return          =true if name is all XML whitespace; otherwise =false.
     */
    public static boolean checkAllSpace(String string) {
        for (int s = 0;  s < string.length();  s ++) {
            if (!XMLChar.isSpace(string.charAt(s)))  return false;
        }
        return true;
    }

    /**
     * Returns whether the specified <var>xmlEncoding</var> conforms to an encoding name in XML 1.0.
     * Refer to <A href="http://www.w3.org/TR/1998/REC-xml-19980210#NT-EncName">
     * the definition of <CODE>EncName</CODE></A> for details.
     * <p>Note that just because <var>enc</var> may be a valid encoding name does not
     * imply the encoding is supported by XML4J.
     * @param   xmlEncoding Name to be checked as a valid encoding name.
     * @return              =true if name complies with XML spec; otherwise =false.
     * @see com.ibm.xml.parser.TXDocument#setEncoding
     */
    public static boolean checkEncoding(String xmlEncoding) {
        if (1 > xmlEncoding.length())  return false;
        if (0 > "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".indexOf(xmlEncoding.charAt(0)))
            return false;
        for (int i = 1;  i < xmlEncoding.length(); i ++) {
            if (0 > "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ._-".indexOf(xmlEncoding.charAt(i)))
                return false;
        }
        return true;
    }

    /**
     * Returns whether the specified <var>languageID</var> conforms to a language ID in XML 1.0.
     * Refer to <A href="http://www.w3.org/TR/1998/REC-xml-19980210#sec-lang-tag">
     * XML 1.0 / 2.12 Language Identification</A> for details.
     * @param   languageID  ID to be checked as a valid language ID.
     * @return              =true if ID complies with XML spec; otherwise =false.
     * @see com.ibm.xml.parser.TXElement#getLanguage
     * @see com.ibm.xml.parser.TXText#getLanguage
     * @see com.ibm.xml.parser.GeneralReference#getLanguage
     */
    public static boolean checkLanguageID(String languageID) {

        // id must be at least 2 chars in length
        int len = languageID.length();
        if (2 > len) 
            return false;

        // First two chars indicate type of code:
        //    1) IS639Code ::= [a-zA-Z][a-zA-Z] ...
        //    2) IanaCode  ::= [iI] '-' ...
        //    3) UserCode  ::= [xX] '-' ...
        char c1 = languageID.charAt(0);
        char c2 = languageID.charAt(1);
        int ind = 2;

        // ISO639Code
        if (('a' <= c1 && c1 <= 'z' || 'A' <= c1 && c1 <= 'Z')
            && ('a' <= c2 && c2 <= 'z' || 'A' <= c2 && c2 <= 'Z')) {
            // Note: May have optional SubCode. -Ac
            } 

        // IanaCode | UserCode
        else if ('-' == c2
            && ('i' == c1 || 'I' == c1 || 'x' == c1 || 'X' == c1)
            && 3 <= len) {
            // ([a-z] | [A-Z])+
            // Note: Must have SubCode like production! -Ac
            c1 = languageID.charAt(ind++);
            if (!('a' <= c1 && c1 <= 'z' || 'A' <= c1 && c1 <= 'Z'))
                return false;
            while (ind < len) {
                c1 = languageID.charAt(ind); // Defect: #503
                if (!('a' <= c1 && c1 <= 'z' || 'A' <= c1 && c1 <= 'Z'))
                    break;
                ind++; // Defect: #503
                }
            }

        // error
        else {
            return false;
            }

        // Subcode
        while (ind < len) {
            // must be at least two chars in length ('-' + [a-zA-z])
            if (ind+2 > len)
                return false;

            // '-'
            if ('-' != languageID.charAt(ind++))  
                return false;

            // [a-zA-Z]+
            c1 = languageID.charAt(ind++);
            if (!('a' <= c1 && c1 <= 'z' || 'A' <= c1 && c1 <= 'Z'))
                return false;
            while (ind < len) {
                c1 = languageID.charAt(ind);
                if (!('a' <= c1 && c1 <= 'z' || 'A' <= c1 && c1 <= 'Z'))
                    break;
                ind ++;
                }
            }

        // id is valid
        return true;

        } // checkLanguageID(String):boolean

    /**
     * Returns whether the specified <var>versionNum</var> conforms to a version numner in XML 1.0.
     * Refer to <A href="http://www.w3.org/TR/1998/REC-xml-19980210#NT-VersionNum">
     * the definition of <code>VersionNum</code></A> for details.
     * @param   versionNum  Number to be checked as a valid version number.
     * @return              =true if number complies with XML spec; otherwise =false.
     */
    public static boolean checkVersionNum(String versionNum) {
        if (1 > versionNum.length())  return false;
        for (int i = 0;  i < versionNum.length();  i ++) {
            int ch = versionNum.charAt(i);
            if (!('a' <= ch && ch <= 'z'
                  || 'A' <= ch && ch <= 'Z'
                  || '0' <= ch && ch <= '9'
                  || 0 <= "_.:-".indexOf(ch)))
                return false;
        }
        return true;
    }

    /**
     * Returns the index of the first invalid character in the specified <var>uri</var>.
     * Refer to RFC2396 for details.
     * @param   uri     URI to check for validity against RFC2396.
     * @return          0-based index of first invalid URI character, or <code>-1</code>
     *                  if URI is valid.
     */
    public static int getInvalidURIChar(String uri) {
        for (int i = 0;  i < uri.length();  i ++) {
            int ch = uri.charAt(i);
            if (!('a' <= ch && ch <= 'z'
                  || 'A' <= ch && ch <= 'Z'
                  || '0' <= ch && ch <= '9'
                  || 0 <= ";/?:@&=+$,-_.!~*'()%".indexOf(ch))) { // '#' fragment is not allowed.
                return i;
            }
        }
        return -1;
    }

    /**
     * Returns whether the specified URI string is a URN.
     */
    public static boolean isURN(String uri) {
        return uri.length() > 4 && uri.substring(0, 4).equalsIgnoreCase("urn:");
    }

    /**
     * Returns normalized URN, "urn:" and &lt;NID&gt; are lower-cased.
     * Refer to RFC2141.
     */
    public static String normalizeURN(String urn) {
        boolean alllower = true;
        for (int i = 0;  i < urn.length();  i ++) {
            int ch = urn.charAt(i);
            if ('A' <= ch && ch <= 'Z')
                alllower = false;
            if (i >= 4 && ch == ':') {
                return alllower ? urn : urn.substring(0, i).toLowerCase()+urn.substring(i);
            }
        }
        // Maybe specified urn is invalid.
        return urn;
    }

    /**
     * Returns the specified <var>string</var> after substituting <code>&amp;, &lt;, &gt;,
     * </code>, and UTF-16 surrogates for the set of general entities 
     * (<code>&amp;amp;, &amp;lt;, &amp;gt;</code>) and numeric character 
     * references (<code>&amp;#...</code>) respectively.  
     * <p>This routine can be used by all DOM and XML4J objects EXCEPT Entity in order to
     * represent their contents in XML format.
     * @param   string      String to convert to XML format.
     * @param   encoding    CURRENTLY NOT IMPLEMENTED.
     * @return              XML-formatted string.
     * @see #backReferenceForEntity
     */
    public static String backReference(String string, String encoding) {
        //sun.io.CharToByteConverter ctbc = getCharToByteConverter(enc);
        StringBuffer sb = new StringBuffer(string.length()*12/10);
        for (int i = 0;  i < string.length();  i ++) {
            char ch = string.charAt(i);
            if ('<' == ch) {
                sb.append("&lt;");
            } else if ('>' == ch) {
                sb.append("&gt;");
            } else if ('&' == ch) {
                sb.append("&amp;");
                /**
            } else if ('"' == ch) {
                sb.append("&quot;");
            } else if ('\'' == ch) {
                sb.append("&apos;");
                */
            } else if (0xd800 <= ch && ch < 0xdc00) { // UTF-16 surrogate
                int next;
                if (i+1 >= string.length()) {
                    throw new LibraryException("com.ibm.xml.parser.Util#backReference(): Invalid UTF-16 surrogate detected: "
                                               +Integer.toHexString(ch)+ " ?");
                } else {
                    next = string.charAt(++i);
                    if (!(0xdc00 <= next && next < 0xe000))
                        throw new LibraryException("com.ibm.xml.parser.Util#backReference(): Invalid UTF-16 surrogate detected: "
                                                   +Integer.toHexString(ch)+" "+Integer.toHexString(next));
                    next = ((ch-0xd800)<<10)+next-0xdc00+0x00010000;
                }
                sb.append("&#x");
                sb.append(Integer.toHexString(next));
                sb.append(";");
            /*} else if (null != ctbc && !ctbc.canConvert(ch)) {
                sb.append("&#x");
                sb.append(Integer.toString((int)ch, 16));
                sb.append(";");*/
            } else {
                sb.append(ch);
            }
        }
        return sb.toString();
    }

    /**
     * Returns the specified <var>string</var> after substituting <code>&quot;, &apos;, %, CR,
     * LF, TAB</code>,and UTF-16 surrogates for <code>&amp;#x26;, &amp;#x22;, &amp;#x27;, 
     * &amp;#x25;, &amp;#x0D;, &amp;#x0A, &amp;#x09, and &amp;#x...;</code> respectively.
     * <p>This routine can be used by Entity objects in order to represent their contents in 
     * XML format.
     * @param   string      String to convert to XML format.
     * @param   encoding    CURRENTLY NOT IMPLEMENTED.
     * @return              XML-formatted string.
     * @see #backReference
     */
    public static String backReferenceForEntity(String string, String encoding) {
        return backReference(string, "&\"'%\r\n\t", encoding);
    }

    /**
     * Returns the specified <var>string</var> after substituting <VAR>specials</VAR>,
     * and UTF-16 surrogates for chracter references <CODE>&amp;#xnn</CODE>.
     *
     * @param   string      String to convert to XML format.
     * @param   specials    Chracters, should be represeted in chracter referenfces.
     * @param   encoding    CURRENTLY NOT IMPLEMENTED.
     * @return              XML-formatted string.
     * @see #backReference
     */
    public static String backReference(String string, String specials, String encoding) {
        //sun.io.CharToByteConverter ctbc = getCharToByteConverter(enc);
        StringBuffer sb = new StringBuffer(string.length()*12/10);
        for (int i = 0;  i < string.length();  i ++) {
            char ch = string.charAt(i);
            int index = specials.indexOf(ch);
            if (index >= 0) {
                sb.append("&#");
                sb.append(Integer.toString(ch));
                sb.append(";");
            } else if (0xd800 <= ch && ch < 0xdc00) { // UTF-16 surrogate
                int next;
                if (i+1 >= string.length()) {
                    throw new LibraryException("com.ibm.xml.parser.Util#backReference(): Invalid UTF-16 surrogate detected: "
                                               +Integer.toHexString(ch)+ " ?");
                } else {
                    next = string.charAt(++i);
                    if (!(0xdc00 <= next && next < 0xe000))
                        throw new LibraryException("com.ibm.xml.parser.Util#backReference(): Invalid UTF-16 surrogate detected: "
                                               +Integer.toHexString(ch)+" "+Integer.toHexString(next));
                    next = ((ch-0xd800)<<10)+next-0xdc00+0x00010000;
                }
                sb.append("&#x");
                sb.append(Integer.toHexString(next));
                sb.append(";");
            /*} else if (null != ctbc && !ctbc.canConvert(ch)) {
                sb.append("&#x");
                sb.append(Integer.toString((int)ch, 16));
                sb.append(";");*/
            } else {
                sb.append(ch);
            }
        }
        return sb.toString();
    }

    /**
     * Usage of sun.io package is not allowed in Pure Java.
     *
     * @param enc Java encoding name
     * @return CharToByteConverter an instance for enc, or null.
    private static sun.io.CharToByteConverter getCharToByteConverter(String enc) {
        sun.io.CharToByteConverter ctbc = null;
        if (null != enc) {
            try {
                ctbc = sun.io.CharToByteConverter.getConverter(enc);
            } catch (UnsupportedEncodingException e) {
            }
        }
        return ctbc;
    }
     */

    /**
     * Prints <var>n</var> spaces.
     * @param pw        The character output stream to use.
     * @param n         Number of spaces to print.
     * @exception IOException   Thrown if <var>pw</var> is invalid.
     */
    public static void printSpace(java.io.Writer pw, int n) throws IOException {
        for (int i = 0;  i < n;  i ++)
            pw.write(" ");
    }

    /**
     * Prints a newline character and <var>n</var> spaces.
     * @param pw        The character output stream to use.
     * @param n         Number of spaces to print.
     * @exception IOException   Thrown if <var>pw</var> is invalid.
     */
    public static void indent(java.io.Writer pw, int n) throws IOException {
        pw.write("\n");
        printSpace(pw, n);
    }

    /**
     * Returns a sorted vector of strings; strings are orderred using 
     * <CODE>String#compareTo()</CODE>.
     *
     * @param   vector  The vector to be sorted.
     * @return          The sorted <var>vector</var>.
     * @see java.lang.String#compareTo
     */
    public static Vector sortStringVector(Vector vector) {
        String[] as = new String[vector.size()];
        vector.copyInto(as);
        heapSort(as);
        vector.removeAllElements();
        vector.ensureCapacity(as.length);
        for (int i = 0;  i < as.length;  i ++)
            vector.addElement(as[i]);
        return vector;
    }

    /**
     * Sort String array.
     */
    public static void heapSort(String[] pd) {
        heapSort(pd, pd.length);
    }
    /**
     * Sort String array.
     */
    public static void heapSort(String[] pd, int length) {
        int i;
        for (i = length/2;  i >= 0;  i--) {     // Make heap
            fall(pd, length, i);
        }
        for (i = length-1;  i > 0;  i--) {
            String t = pd[0];
            pd[0] = pd[i];
            pd[i] = t;
            fall(pd, i, 0);
        }
    }

    static private void fall(String[] pd, int n, int i) {
        int j = 2*i+1;
        if (j < n) {                            // left exists
            if (j+1 < n) {                      // right exists too
                                                // j: bigger
                if (0 > pd[j].compareTo(pd[j+1]))
                    j = 2*i+2;
            } else {                            // only left
            }
            if (0 > pd[i].compareTo(pd[j])) {
                                                // the child is bigger
                String t = pd[i];
                pd[i] = pd[j];
                pd[j] = t;
                fall(pd, n, j);
            }
        }
    }
}
