6945564: Unicode script support in Character class

6948903: Make Unicode scripts available for use in regular expressions Added Unicode script suport Reviewed-by: martin
2026-02-16 13:25:34 +00:00 · 2010-05-18 15:36:47 -07:00 · 2010-05-18 15:36:47 -07:00 · bb2be852aa
commit bb2be852aa
parent d1dc8092ec
12 changed files with 5938 additions and 20 deletions
--- a/jdk/make/java/java/FILES_java.gmk
+++ b/jdk/make/java/java/FILES_java.gmk
@ -34,6 +34,7 @@ JAVA_JAVA_java = \
    java/lang/Thread.java \
    java/lang/Character.java \
    java/lang/CharacterData.java \
+    java/lang/CharacterName.java \
    sun/misc/ASCIICaseInsensitiveComparator.java \
    sun/misc/VM.java \
    sun/misc/Signal.java \
--- a/jdk/make/java/java/Makefile
+++ b/jdk/make/java/java/Makefile
@ -384,6 +384,27 @@ clean::
 	$(RM) $(GENSRCDIR)/java/lang/CharacterDataUndefined.java 
 	$(RM) $(GENSRCDIR)/java/lang/CharacterDataPrivateUse.java

+#
+# Rules to generate classes/java/lang/uniName.dat
+#
+
+
+
+UNINAME = $(CLASSBINDIR)/java/lang/uniName.dat
+GENERATEUNINAME_JARFILE = $(BUILDTOOLJARDIR)/generatecharacter.jar
+
+build: $(UNINAME)
+
+$(UNINAME): $(UNICODEDATA)/UnicodeData.txt \
+	$(GENERATECHARACTER_JARFILE)
+	@$(prep-target)
+	$(BOOT_JAVA_CMD) -classpath $(GENERATECHARACTER_JARFILE) \
+	build.tools.generatecharacter.CharacterName \
+		$(UNICODEDATA)/UnicodeData.txt $(UNINAME)
+
+clean:: 
+	$(RM) $(UNINAME)
+
 #
 # End of rules to create $(GENSRCDIR)/java/lang/CharacterDataXX.java
 #
--- a/jdk/make/tools/UnicodeData/Scripts.txt
+++ b/jdk/make/tools/UnicodeData/Scripts.txt
--- a/jdk/make/tools/src/build/tools/generatecharacter/CharacterName.java
+++ b/jdk/make/tools/src/build/tools/generatecharacter/CharacterName.java
@ -0,0 +1,100 @@
+package build.tools.generatecharacter;
+
+import java.io.*;
+import java.nio.*;
+import java.util.*;
+import java.util.zip.*;
+
+public class CharacterName {
+
+    public static void main(String[] args) {
+        FileReader reader = null;
+        try {
+            if (args.length != 2) {
+                System.err.println("Usage: java CharacterName UniocdeData.txt uniName.dat");
+                System.exit(1);
+            }
+
+            reader = new FileReader(args[0]);
+            BufferedReader bfr = new BufferedReader(reader);
+            String line = null;
+
+            StringBuilder namePool = new StringBuilder();
+            byte[] cpPoolBytes = new byte[0x100000];
+            ByteBuffer cpBB = ByteBuffer.wrap(cpPoolBytes);
+            int lastCp = 0;
+            int cpNum = 0;
+
+            while ((line = bfr.readLine()) != null) {
+                if (line.startsWith("#"))
+                    continue;
+                UnicodeSpec spec = UnicodeSpec.parse(line);
+                if (spec != null) {
+                    int cp = spec.getCodePoint();
+                    String name = spec.getName();
+                    cpNum++;
+                    if (name.equals("<control>") && spec.getOldName() != null) {
+                        if (spec.getOldName().length() != 0)
+                            name = spec.getOldName();
+                        else
+                            continue;
+                    } else if (name.startsWith("<")) {
+                        /*
+                          3400    <CJK Ideograph Extension A, First>
+                          4db5    <CJK Ideograph Extension A, Last>
+                          4e00    <CJK Ideograph, First>
+                          9fc3    <CJK Ideograph, Last>
+                          ac00    <Hangul Syllable, First>
+                          d7a3    <Hangul Syllable, Last>
+                          d800    <Non Private Use High Surrogate, First>
+                          db7f    <Non Private Use High Surrogate, Last>
+                          db80    <Private Use High Surrogate, First>
+                          dbff    <Private Use High Surrogate, Last>
+                          dc00    <Low Surrogate, First>
+                          dfff    <Low Surrogate, Last>
+                          e000    <Private Use, First>
+                          f8ff    <Private Use, Last>
+                         20000    <CJK Ideograph Extension B, First>
+                         2a6d6    <CJK Ideograph Extension B, Last>
+                         f0000    <Plane 15 Private Use, First>
+                         ffffd    <Plane 15 Private Use, Last>
+                        */
+                        continue;
+                    }
+
+                    if (cp == lastCp + 1) {
+                        cpBB.put((byte)name.length());
+                    } else {
+                        cpBB.put((byte)0);  // segment start flag
+                        cpBB.putInt((name.length() << 24) | (cp & 0xffffff));
+                    }
+                    namePool.append(name);
+                    lastCp = cp;
+                }
+            }
+
+            byte[] namePoolBytes = namePool.toString().getBytes("ASCII");
+            int cpLen = cpBB.position();
+            int total = cpLen + namePoolBytes.length;
+
+            DataOutputStream dos = new DataOutputStream(
+                                       new DeflaterOutputStream(
+                                           new FileOutputStream(args[1])));
+            dos.writeInt(total);  // total
+            dos.writeInt(cpLen);  // nameOff
+            dos.write(cpPoolBytes, 0, cpLen);
+            dos.write(namePoolBytes);
+            dos.close();
+
+        } catch (Throwable e) {
+            System.out.println("Unexpected exception:");
+            e.printStackTrace();
+        } finally {
+            if (reader != null) {
+                try {
+                    reader.close();
+                } catch (Throwable ee) { ee.printStackTrace(); }
+            }
+        }
+    }
+}
--- a/jdk/make/tools/src/build/tools/generatecharacter/CharacterScript.java
+++ b/jdk/make/tools/src/build/tools/generatecharacter/CharacterScript.java
@ -0,0 +1,214 @@
+import java.util.regex.*;
+import java.util.*;
+import java.io.*;
+
+public class CharacterScript {
+
+    // generate the code needed for j.l.C.UnicodeScript
+    static void fortest(String fmt, Object... o) {
+        //System.out.printf(fmt, o);
+    }
+
+    static void print(String fmt, Object... o) {
+        System.out.printf(fmt, o);
+    }
+
+    static void debug(String fmt, Object... o) {
+        //System.out.printf(fmt, o);
+    }
+
+    public static void main(String args[]){
+        try {
+            if (args.length != 1) {
+                System.out.println("java CharacterScript script.txt out");
+                System.exit(1);
+            }
+
+            int i, j;
+            BufferedReader sbfr = new BufferedReader(new FileReader(args[0]));
+            HashMap<String,Integer> scriptMap = new HashMap<String,Integer>();
+            String line = null;
+
+            Matcher m = Pattern.compile("(\\p{XDigit}+)(?:\\.{2}(\\p{XDigit}+))?\\s+;\\s+(\\w+)\\s+#.*").matcher("");
+
+            int prevS = -1;
+            int prevE = -1;
+            String prevN = null;
+            int[][] scripts = new int[1024][3];
+            int scriptSize = 0;
+
+            while ((line = sbfr.readLine()) != null) {
+                if (line.length() <= 1 || line.charAt(0) == '#') {
+                    continue;
+                }
+                m.reset(line);
+                if (m.matches()) {
+                    int start = Integer.parseInt(m.group(1), 16);
+                    int end = (m.group(2)==null)?start
+                              :Integer.parseInt(m.group(2), 16);
+                    String name = m.group(3);
+                    if (name.equals(prevN) && start == prevE + 1) {
+                        prevE = end;
+                    } else {
+                        if (prevS != -1) {
+                            if (scriptMap.get(prevN) == null) {
+                                scriptMap.put(prevN, scriptMap.size());
+                            }
+                            scripts[scriptSize][0] = prevS;
+                            scripts[scriptSize][1] = prevE;
+                            scripts[scriptSize][2] = scriptMap.get(prevN);
+                            scriptSize++;
+                        }
+                        debug("%x-%x\t%s%n", prevS, prevE, prevN);
+                        prevS = start; prevE = end; prevN = name;
+                    }
+                } else {
+                    debug("Warning: Unrecognized line <%s>%n", line);
+                }
+            }
+
+            //last one.
+            if (scriptMap.get(prevN) == null) {
+                scriptMap.put(prevN, scriptMap.size());
+            }
+            scripts[scriptSize][0] = prevS;
+            scripts[scriptSize][1] = prevE;
+            scripts[scriptSize][2] = scriptMap.get(prevN);
+            scriptSize++;
+
+            debug("%x-%x\t%s%n", prevS, prevE, prevN);
+            debug("-----------------%n");
+            debug("Total scripts=%s%n", scriptMap.size());
+            debug("-----------------%n%n");
+
+            String[] names = new String[scriptMap.size()];
+            for (String name: scriptMap.keySet()) {
+                names[scriptMap.get(name).intValue()] = name;
+            }
+
+            for (j = 0; j < scriptSize; j++) {
+                for (int cp = scripts[j][0]; cp <= scripts[j][1]; cp++) {
+                    String name = names[scripts[j][2]].toUpperCase(Locale.ENGLISH);;
+                    if (cp > 0xffff)
+                        System.out.printf("%05X    %s%n", cp, name);
+                    else
+                        System.out.printf("%05X    %s%n", cp, name);
+                }
+            }
+
+            Arrays.sort(scripts, 0, scriptSize,
+                        new Comparator<int[]>() {
+                            public int compare(int[] a1, int[] a2) {
+                                return a1[0] - a2[0];
+                            }
+                            public boolean compare(Object obj) {
+                                return obj == this;
+                            }
+                         });
+
+
+
+            // Consolidation: there are lots of "reserved" code points
+            // embedded in those otherwise "sequential" blocks.
+            // To make the lookup table smaller, we combine those
+            // separated segments with the assumption that the lookup
+            // implementation checks
+            //    Character.getType() !=  Character.UNASSIGNED
+            // first (return UNKNOWN for unassigned)
+
+            ArrayList<int[]> list = new ArrayList();
+            list.add(scripts[0]);
+
+            int[] last = scripts[0];
+            for (i = 1; i < scriptSize; i++) {
+                if (scripts[i][0] != (last[1] + 1)) {
+
+                    boolean isNotUnassigned = false;
+                    for (int cp = last[1] + 1; cp < scripts[i][0]; cp++) {
+                        if (Character.getType(cp) != Character.UNASSIGNED) {
+                            isNotUnassigned = true;
+                            debug("Warning: [%x] is ASSIGNED but in NON script%n", cp);
+                            break;
+                        }
+                    }
+                    if (isNotUnassigned) {
+                        // surrogates only?
+                        int[] a = new int[3];
+                        a[0] = last[1] + 1;
+                        a[1] = scripts[i][0] - 1;
+                        a[2] = -1;  // unknown
+                        list.add(a);
+                    } else {
+                        if (last[2] == scripts[i][2]) {
+                            //combine
+                            last[1] = scripts[i][1];
+                            continue;
+                        } else {
+                            // expand last
+                            last[1] = scripts[i][0] - 1;
+                        }
+                    }
+                }
+                list.add(scripts[i]);
+                last = scripts[i];
+            }
+
+            for (i = 0; i < list.size(); i++) {
+                int[] a = (int[])list.get(i);
+                String name = "UNKNOWN";
+                if (a[2] != -1)
+                    name = names[a[2]].toUpperCase(Locale.US);
+                debug("0x%05x, 0x%05x  %s%n", a[0], a[1], name);
+            }
+            debug("--->total=%d%n", list.size());
+
+
+            //////////////////OUTPUT//////////////////////////////////
+            print("public class Scripts {%n%n");
+            print("    public static enum UnicodeScript {%n");
+            for (i = 0; i < names.length; i++) {
+                print("        /**%n         * Unicode script \"%s\".%n         */%n", names[i]);
+                print("        %s,%n%n",  names[i].toUpperCase(Locale.US));
+            }
+            print("        /**%n         * Unicode script \"Unknown\".%n         */%n        UNKNOWN;%n%n");
+
+
+            // lookup table
+            print("        private static final int[] scriptStarts = {%n");
+            for (int[] a : list) {
+                String name = "UNKNOWN";
+                if (a[2] != -1)
+                    name = names[a[2]].toUpperCase(Locale.US);
+                if (a[0] < 0x10000)
+                    print("            0x%04X,   // %04X..%04X; %s%n",
+                          a[0], a[0], a[1], name);
+                else
+                    print("            0x%05X,  // %05X..%05X; %s%n",
+                          a[0], a[0], a[1], name);
+            }
+            last = list.get(list.size() -1);
+            if (last[1] != Character.MAX_CODE_POINT)
+                print("            0x%05X   // %05X..%06X; %s%n",
+                      last[1] + 1, last[1] + 1, Character.MAX_CODE_POINT,
+                      "UNKNOWN");
+            print("%n        };%n%n");
+
+            print("        private static final UnicodeScript[] scripts = {%n");
+            for (int[] a : list) {
+                String name = "UNKNOWN";
+                if (a[2] != -1)
+                    name = names[a[2]].toUpperCase(Locale.US);
+                print("            %s,%n", name);
+            }
+
+            if (last[1] != Character.MAX_CODE_POINT)
+                print("            UNKNOWN%n");
+            print("        };%n");
+            print("    }%n");
+            print("}%n");
+
+        } catch (Exception e) {
+            e.printStackTrace();
+        }
+    }
+}
--- a/jdk/make/tools/src/build/tools/generatecharacter/GenerateCharacter.java
+++ b/jdk/make/tools/src/build/tools/generatecharacter/GenerateCharacter.java
@ -35,6 +35,8 @@ import java.io.BufferedWriter;
 import java.io.FileWriter;
 import java.io.File;

+import build.tools.generatecharacter.CharacterName;
+
 /**
 * This program generates the source code for the class java.lang.Character.
 * It also generates native C code that can perform the same operations.
--- a/jdk/src/share/classes/java/lang/Character.java
+++ b/jdk/src/share/classes/java/lang/Character.java
--- a/jdk/src/share/classes/java/lang/CharacterName.java
+++ b/jdk/src/share/classes/java/lang/CharacterName.java
@ -0,0 +1,106 @@
+/*
+ * Copyright 2010 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Sun designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Sun in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ */
+
+package java.lang;
+
+import java.io.DataInputStream;
+import java.io.InputStream;
+import java.lang.ref.SoftReference;
+import java.util.Arrays;
+import java.util.zip.InflaterInputStream;
+import java.security.AccessController;
+import java.security.PrivilegedAction;
+
+class CharacterName {
+
+    private static SoftReference<byte[]> refStrPool;
+    private static int[][] lookup;
+
+    private static synchronized byte[] initNamePool() {
+        byte[] strPool = null;
+        if (refStrPool != null && (strPool = refStrPool.get()) != null)
+            return strPool;
+        DataInputStream dis = null;
+        try {
+            dis = new DataInputStream(new InflaterInputStream(
+                AccessController.doPrivileged(new PrivilegedAction<InputStream>()
+                {
+                    public InputStream run() {
+                        return getClass().getResourceAsStream("uniName.dat");
+                    }
+                })));
+
+            lookup = new int[(Character.MAX_CODE_POINT + 1) >> 8][];
+            int total = dis.readInt();
+            int cpEnd = dis.readInt();
+            byte ba[] = new byte[cpEnd];
+            dis.readFully(ba);
+
+            int nameOff = 0;
+            int cpOff = 0;
+            int cp = 0;
+            do {
+                int len = ba[cpOff++] & 0xff;
+                if (len == 0) {
+                    len = ba[cpOff++] & 0xff;
+                    // always big-endian
+                    cp = ((ba[cpOff++] & 0xff) << 16) |
+                         ((ba[cpOff++] & 0xff) <<  8) |
+                         ((ba[cpOff++] & 0xff));
+                }  else {
+                    cp++;
+                }
+                int hi = cp >> 8;
+                if (lookup[hi] == null) {
+                    lookup[hi] = new int[0x100];
+                }
+                lookup[hi][cp&0xff] = (nameOff << 8) | len;
+                nameOff += len;
+            } while (cpOff < cpEnd);
+            strPool = new byte[total - cpEnd];
+            dis.readFully(strPool);
+            refStrPool = new SoftReference<byte[]>(strPool);
+        } catch (Exception x) {
+            throw new InternalError(x.getMessage());
+        } finally {
+            try {
+                if (dis != null)
+                    dis.close();
+            } catch (Exception xx) {}
+        }
+        return strPool;
+    }
+
+    public static String get(int cp) {
+        byte[] strPool = null;
+        if (refStrPool == null || (strPool = refStrPool.get()) == null)
+            strPool = initNamePool();
+        int off = 0;
+        if (lookup[cp>>8] == null ||
+            (off = lookup[cp>>8][cp&0xff]) == 0)
+            return null;
+        return new String(strPool, 0, off >>> 8, off & 0xff);  // ASCII
+    }
+}
--- a/jdk/src/share/classes/java/util/regex/Pattern.java
+++ b/jdk/src/share/classes/java/util/regex/Pattern.java
@ -29,6 +29,7 @@ import java.security.AccessController;
 import java.security.PrivilegedAction;
 import java.text.CharacterIterator;
 import java.text.Normalizer;
+import java.util.Locale;
 import java.util.Map;
 import java.util.ArrayList;
 import java.util.HashMap;
@ -200,8 +201,9 @@ import java.util.Arrays;
 *     <td>Equivalent to java.lang.Character.isMirrored()</td></tr>
 *
 * <tr><th>&nbsp;</th></tr>
- * <tr align="left"><th colspan="2" id="unicode">Classes for Unicode blocks and categories</th></tr>
- *
+ * <tr align="left"><th colspan="2" id="unicode">Classes for Unicode scripts, blocks and categories</th></tr>
+ * * <tr><td valign="top" headers="construct unicode"><tt>\p{IsLatin}</tt></td>
+ *     <td headers="matches">A Latin&nbsp;script character (simple <a href="#ubc">script</a>)</td></tr>
 * <tr><td valign="top" headers="construct unicode"><tt>\p{InGreek}</tt></td>
 *     <td headers="matches">A character in the Greek&nbsp;block (simple <a href="#ubc">block</a>)</td></tr>
 * <tr><td valign="top" headers="construct unicode"><tt>\p{Lu}</tt></td>
@ -527,25 +529,40 @@ import java.util.Arrays;
 * while not equal, compile into the same pattern, which matches the character
 * with hexadecimal value <tt>0x2014</tt>.
 *
- * <a name="ubc"> <p>Unicode blocks and categories are written with the
- * <tt>\p</tt> and <tt>\P</tt> constructs as in
- * Perl. <tt>\p{</tt><i>prop</i><tt>}</tt> matches if the input has the
- * property <i>prop</i>, while <tt>\P{</tt><i>prop</i><tt>}</tt> does not match if
- * the input has that property.  Blocks are specified with the prefix
- * <tt>In</tt>, as in <tt>InMongolian</tt>.  Categories may be specified with
- * the optional prefix <tt>Is</tt>: Both <tt>\p{L}</tt> and <tt>\p{IsL}</tt>
- * denote the category of Unicode letters.  Blocks and categories can be used
- * both inside and outside of a character class.
- *
+ * <a name="ubc">
+ * <p>Unicode scripts, blocks and categories are written with the <tt>\p</tt> and
+ * <tt>\P</tt> constructs as in Perl. <tt>\p{</tt><i>prop</i><tt>}</tt> matches if
+ * the input has the property <i>prop</i>, while <tt>\P{</tt><i>prop</i><tt>}</tt>
+ * does not match if the input has that property.
+ * <p>
+ * Scripts are specified either with the prefix {@code Is}, as in
+ * {@code IsHiragana}, or by using  the {@code script} keyword (or its short
+ * form {@code sc})as in {@code script=Hiragana} or {@code sc=Hiragana}.
+ * <p>
+ * Blocks are specified with the prefix {@code In}, as in
+ * {@code InMongolian}, or by using the keyword {@code block} (or its short
+ * form {@code blk}) as in {@code block=Mongolian} or {@code blk=Mongolian}.
+ * <p>
+ * Categories may be specified with the optional prefix {@code Is}:
+ * Both {@code \p{L}} and {@code \p{IsL}} denote the category of Unicode
+ * letters. Same as scripts and blocks, categories can also be specified
+ * by using the keyword {@code general_category} (or its short form
+ * {@code gc}) as in {@code general_category=Lu} or {@code gc=Lu}.
+ * <p>
+ * Scripts, blocks and categories can be used both inside and outside of a
+ * character class.
 * <p> The supported categories are those of
 * <a href="http://www.unicode.org/unicode/standard/standard.html">
 * <i>The Unicode Standard</i></a> in the version specified by the
 * {@link java.lang.Character Character} class. The category names are those
 * defined in the Standard, both normative and informative.
+ * The script names supported by <code>Pattern</code> are the valid script names
+ * accepted and defined by
+ * {@link java.lang.Character.UnicodeScript#forName(String) UnicodeScript.forName}.
 * The block names supported by <code>Pattern</code> are the valid block names
 * accepted and defined by
 * {@link java.lang.Character.UnicodeBlock#forName(String) UnicodeBlock.forName}.
- *
+ * <p>
 * <a name="jcc"> <p>Categories that behave like the java.lang.Character
 * boolean is<i>methodname</i> methods (except for the deprecated ones) are
 * available through the same <tt>\p{</tt><i>prop</i><tt>}</tt> syntax where
@ -2488,12 +2505,34 @@ loop:   for(int x=0, offset=0; x<nCodePoints; x++, offset+=len) {
            name = new String(temp, i, j-i-1);
        }

-        if (name.startsWith("In")) {
-            node = unicodeBlockPropertyFor(name.substring(2));
+        int i = name.indexOf('=');
+        if (i != -1) {
+            // property construct \p{name=value}
+            String value = name.substring(i + 1);
+            name = name.substring(0, i).toLowerCase(Locale.ENGLISH);
+            if ("sc".equals(name) || "script".equals(name)) {
+                node = unicodeScriptPropertyFor(value);
+            } else if ("blk".equals(name) || "block".equals(name)) {
+                node = unicodeBlockPropertyFor(value);
+            } else if ("gc".equals(name) || "general_category".equals(name)) {
+                node = charPropertyNodeFor(value);
+            } else {
+                throw error("Unknown Unicode property {name=<" + name + ">, "
+                             + "value=<" + value + ">}");
+            }
        } else {
-            if (name.startsWith("Is"))
+            if (name.startsWith("In")) {
+                // \p{inBlockName}
+                node = unicodeBlockPropertyFor(name.substring(2));
+            } else if (name.startsWith("Is")) {
+                // \p{isGeneralCategory} and \p{isScriptName}
                name = name.substring(2);
-            node = charPropertyNodeFor(name);
+                node = CharPropertyNames.charPropertyFor(name);
+                if (node == null)
+                    node = unicodeScriptPropertyFor(name);
+            } else {
+                node = charPropertyNodeFor(name);
+            }
        }
        if (maybeComplement) {
            if (node instanceof Category || node instanceof Block)
@ -2503,6 +2542,21 @@ loop:   for(int x=0, offset=0; x<nCodePoints; x++, offset+=len) {
        return node;
    }

+
+    /**
+     * Returns a CharProperty matching all characters belong to
+     * a UnicodeScript.
+     */
+    private CharProperty unicodeScriptPropertyFor(String name) {
+        final Character.UnicodeScript script;
+        try {
+            script = Character.UnicodeScript.forName(name);
+        } catch (IllegalArgumentException iae) {
+            throw error("Unknown character script name {" + name + "}");
+        }
+        return new Script(script);
+    }
+
    /**
     * Returns a CharProperty matching all characters in a UnicodeBlock.
     */
@ -3566,6 +3620,19 @@ loop:   for(int x=0, offset=0; x<nCodePoints; x++, offset+=len) {
        }
    }

+    /**
+     * Node class that matches a Unicode script
+     */
+    static final class Script extends CharProperty {
+        final Character.UnicodeScript script;
+        Script(Character.UnicodeScript script) {
+            this.script = script;
+        }
+        boolean isSatisfiedBy(int ch) {
+            return script == Character.UnicodeScript.of(ch);
+        }
+    }
+
    /**
     * Node class that matches a Unicode category.
     */
--- a/jdk/test/java/lang/Character/CheckScript.java
+++ b/jdk/test/java/lang/Character/CheckScript.java
@ -0,0 +1,105 @@
+/**
+ * @test
+ * @bug 6945564
+ * @summary  Check that the j.l.Character.UnicodeScript
+ * @ignore don't run until #6903266 is integrated
+ */
+
+import java.io.*;
+import java.lang.reflect.*;
+import java.util.*;
+import java.util.regex.*;
+import java.lang.Character.UnicodeScript;
+
+public class CheckScript {
+
+    public static void main(String[] args) throws Exception {
+
+        if (args.length != 1) {
+            System.out.println("java CharacterScript script.txt");
+            System.exit(1);
+        }
+        BufferedReader sbfr = new BufferedReader(new FileReader(args[0]));
+        Matcher m = Pattern.compile("(\\p{XDigit}+)(?:\\.{2}(\\p{XDigit}+))?\\s+;\\s+(\\w+)\\s+#.*").matcher("");
+        String line = null;
+        HashMap<String,ArrayList<Integer>> scripts = new HashMap<>();
+        while ((line = sbfr.readLine()) != null) {
+            if (line.length() <= 1 || line.charAt(0) == '#') {
+                continue;
+            }
+            m.reset(line);
+            if (m.matches()) {
+                int start = Integer.parseInt(m.group(1), 16);
+                int end = (m.group(2)==null)?start
+                                            :Integer.parseInt(m.group(2), 16);
+                String name = m.group(3).toLowerCase(Locale.ENGLISH);
+                ArrayList<Integer> ranges = scripts.get(name);
+                if (ranges == null) {
+                    ranges = new ArrayList<Integer>();
+                    scripts.put(name, ranges);
+                }
+                ranges.add(start);
+                ranges.add(end);
+            }
+        }
+        sbfr.close();
+        // check all defined ranges
+        Integer[] ZEROSIZEARRAY = new Integer[0];
+        for (String name : scripts.keySet()) {
+            System.out.println("Checking " + name + "...");
+            Integer[] ranges = scripts.get(name).toArray(ZEROSIZEARRAY);
+            Character.UnicodeScript expected =
+                Character.UnicodeScript.forName(name);
+
+            int off = 0;
+            while (off < ranges.length) {
+                int start = ranges[off++];
+                int end = ranges[off++];
+                for (int cp = start; cp <= end; cp++) {
+                    Character.UnicodeScript script =
+                        Character.UnicodeScript.of(cp);
+                    if (script != expected) {
+                        throw new RuntimeException(
+                            "UnicodeScript failed: cp=" +
+                            Integer.toHexString(cp) +
+                            ", of(cp)=<" + script + "> but <" +
+                            expected + "> is expected");
+                   }
+                }
+            }
+        }
+        // check all codepoints
+        for (int cp = 0; cp < Character.MAX_CODE_POINT; cp++) {
+            Character.UnicodeScript script = Character.UnicodeScript.of(cp);
+            if (script == Character.UnicodeScript.UNKNOWN) {
+                if (Character.getType(cp) != Character.UNASSIGNED &&
+                    Character.getType(cp) != Character.SURROGATE &&
+                    Character.getType(cp) != Character.PRIVATE_USE)
+                    throw new RuntimeException(
+                        "UnicodeScript failed: cp=" +
+                        Integer.toHexString(cp) +
+                        ", of(cp)=<" + script + "> but UNKNOWN is expected");
+            } else {
+                Integer[] ranges =
+                    scripts.get(script.name().toLowerCase(Locale.ENGLISH))
+                           .toArray(ZEROSIZEARRAY);
+                int off = 0;
+                boolean found = false;
+                while (off < ranges.length) {
+                    int start = ranges[off++];
+                    int end = ranges[off++];
+                    if (cp >= start && cp <= end)
+                        found = true;
+                }
+                if (!found) {
+                    throw new RuntimeException(
+                        "UnicodeScript failed: cp=" +
+                        Integer.toHexString(cp) +
+                        ", of(cp)=<" + script +
+                        "> but NOT in ranges of this script");
+
+                }
+            }
+        }
+    }
+}
--- a/jdk/test/java/lang/Character/Scripts.txt
+++ b/jdk/test/java/lang/Character/Scripts.txt
--- a/jdk/test/java/util/regex/RegExTest.java
+++ b/jdk/test/java/util/regex/RegExTest.java
@ -32,7 +32,7 @@
 * 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476
 * 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940
 * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133
- * 6350801 6676425 6878475 6919132 6931676
+ * 6350801 6676425 6878475 6919132 6931676 6948903
 */

 import java.util.regex.*;
@ -135,7 +135,7 @@ public class RegExTest {
        surrogatesInClassTest();
        namedGroupCaptureTest();
        nonBmpClassComplementTest();
-
+        unicodePropertiesTest();
        if (failure)
            throw new RuntimeException("Failure in the RE handling.");
        else
@ -3515,7 +3515,7 @@ public class RegExTest {
        report("NamedGroupCapture");
    }

-    // This is for bug 6919132
+    // This is for bug 6969132
    private static void nonBmpClassComplementTest() throws Exception {
        Pattern p = Pattern.compile("\\P{Lu}");
        Matcher m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
@ -3539,4 +3539,79 @@ public class RegExTest {
        report("NonBmpClassComplement");
    }

+    private static void unicodePropertiesTest() throws Exception {
+        // different forms
+        if (!Pattern.compile("\\p{IsLu}").matcher("A").matches() ||
+            !Pattern.compile("\\p{Lu}").matcher("A").matches() ||
+            !Pattern.compile("\\p{gc=Lu}").matcher("A").matches() ||
+            !Pattern.compile("\\p{general_category=Lu}").matcher("A").matches() ||
+            !Pattern.compile("\\p{IsLatin}").matcher("B").matches() ||
+            !Pattern.compile("\\p{sc=Latin}").matcher("B").matches() ||
+            !Pattern.compile("\\p{script=Latin}").matcher("B").matches() ||
+            !Pattern.compile("\\p{InBasicLatin}").matcher("c").matches() ||
+            !Pattern.compile("\\p{blk=BasicLatin}").matcher("c").matches() ||
+            !Pattern.compile("\\p{block=BasicLatin}").matcher("c").matches())
+            failCount++;
+
+        Matcher common  = Pattern.compile("\\p{script=Common}").matcher("");
+        Matcher unknown = Pattern.compile("\\p{IsUnknown}").matcher("");
+        Matcher lastSM  = common;
+        Character.UnicodeScript lastScript = Character.UnicodeScript.of(0);
+
+        Matcher latin  = Pattern.compile("\\p{block=basic_latin}").matcher("");
+        Matcher greek  = Pattern.compile("\\p{InGreek}").matcher("");
+        Matcher lastBM = latin;
+        Character.UnicodeBlock lastBlock = Character.UnicodeBlock.of(0);
+
+        for (int cp = 1; cp < Character.MAX_CODE_POINT; cp++) {
+            if (cp >= 0x30000 && (cp & 0x70) == 0){
+                continue;  // only pick couple code points, they are the same
+            }
+
+            // Unicode Script
+            Character.UnicodeScript script = Character.UnicodeScript.of(cp);
+            Matcher m;
+            String str = new String(Character.toChars(cp));
+            if (script == lastScript) {
+                 m = lastSM;
+                 m.reset(str);
+            } else {
+                 m  = Pattern.compile("\\p{Is" + script.name() + "}").matcher(str);
+            }
+            if (!m.matches()) {
+                failCount++;
+            }
+            Matcher other = (script == Character.UnicodeScript.COMMON)? unknown : common;
+            other.reset(str);
+            if (other.matches()) {
+                failCount++;
+            }
+            lastSM = m;
+            lastScript = script;
+
+            // Unicode Block
+            Character.UnicodeBlock block = Character.UnicodeBlock.of(cp);
+            if (block == null) {
+                //System.out.printf("Not a Block: cp=%x%n", cp);
+                continue;
+            }
+            if (block == lastBlock) {
+                 m = lastBM;
+                 m.reset(str);
+            } else {
+                 m  = Pattern.compile("\\p{block=" + block.toString() + "}").matcher(str);
+            }
+            if (!m.matches()) {
+                failCount++;
+            }
+            other = (block == Character.UnicodeBlock.BASIC_LATIN)? greek : latin;
+            other.reset(str);
+            if (other.matches()) {
+                failCount++;
+            }
+            lastBM = m;
+            lastBlock = block;
+        }
+        report("unicodeProperties");
+    }
 }