8365675: Add String Unicode Case-Folding Support

Reviewed-by: rriggs, naoto, ihse
2026-01-28 03:58:21 +00:00 · 2025-12-02 19:47:18 +00:00 · 2025-12-02 19:47:18 +00:00 · b97ed667db
commit b97ed667db
parent 618732ffc0
13 changed files with 1245 additions and 212 deletions
--- a/make/ToolsJdk.gmk
+++ b/make/ToolsJdk.gmk
@ -79,7 +79,7 @@ TOOL_GENERATEEXTRAPROPERTIES = $(JAVA_SMALL) -cp $(BUILDTOOLS_OUTPUTDIR)/jdk_too
    build.tools.generateextraproperties.GenerateExtraProperties
 TOOL_GENERATECASEFOLDING = $(JAVA_SMALL) -cp $(BUILDTOOLS_OUTPUTDIR)/jdk_tools_classes \
-    build.tools.generatecharacter.CaseFolding
+    build.tools.generatecharacter.GenerateCaseFolding
 TOOL_MAKEZIPREPRODUCIBLE = $(JAVA_SMALL) -cp $(BUILDTOOLS_OUTPUTDIR)/jdk_tools_classes \
    build.tools.makezipreproducible.MakeZipReproducible
--- a/make/jdk/src/classes/build/tools/generatecharacter/CaseFolding.java
+++ b/make/jdk/src/classes/build/tools/generatecharacter/CaseFolding.java
@ -1,73 +0,0 @@
 /*
 * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.  Oracle designates this
 * particular file as subject to the "Classpath" exception as provided
 * by Oracle in the LICENSE file that accompanied this code.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 */
 package build.tools.generatecharacter;
 import java.io.IOException;
 import java.nio.file.Files;
 import java.nio.file.Paths;
 import java.nio.file.StandardOpenOption;
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
 public class CaseFolding {
    public static void main(String[] args) throws Throwable {
        if (args.length != 3) {
            System.err.println("Usage: java CaseFolding TemplateFile CaseFolding.txt CaseFolding.java");
            System.exit(1);
        }
        var templateFile = Paths.get(args[0]);
        var caseFoldingTxt = Paths.get(args[1]);
        var genSrcFile = Paths.get(args[2]);
        var supportedTypes = "^.*; [CTS]; .*$";
        var caseFoldingEntries = Files.lines(caseFoldingTxt)
            .filter(line -> !line.startsWith("#") && line.matches(supportedTypes))
            .map(line -> {
                String[] cols = line.split("; ");
                return new String[] {cols[0], cols[1], cols[2]};
            })
            .filter(cols -> {
                //  the folding case doesn't map back to the original char.
                var cp1 = Integer.parseInt(cols[0], 16);
                var cp2 = Integer.parseInt(cols[2], 16);
                return Character.toUpperCase(cp2) != cp1 && Character.toLowerCase(cp2) != cp1;
            })
            .map(cols -> String.format("        entry(0x%s, 0x%s)", cols[0], cols[2]))
            .collect(Collectors.joining(",\n", "", ""));
        // hack, hack, hack! the logic does not pick 0131. just add manually to support 'I's.
        // 0049; T; 0131; # LATIN CAPITAL LETTER I
        final String T_0x0131_0x49 = String.format("        entry(0x%04x, 0x%04x),\n", 0x0131, 0x49);
        // Generate .java file
        Files.write(
            genSrcFile,
            Files.lines(templateFile)
                .map(line -> line.contains("%%%Entries") ? T_0x0131_0x49 + caseFoldingEntries : line)
                .collect(Collectors.toList()),
            StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING);
    }
 }
--- a/make/jdk/src/classes/build/tools/generatecharacter/GenerateCaseFolding.java
+++ b/make/jdk/src/classes/build/tools/generatecharacter/GenerateCaseFolding.java
@ -0,0 +1,134 @@
 /*
 * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.  Oracle designates this
 * particular file as subject to the "Classpath" exception as provided
 * by Oracle in the LICENSE file that accompanied this code.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 */
 package build.tools.generatecharacter;
 import java.nio.file.Files;
 import java.nio.file.Paths;
 import java.nio.file.StandardOpenOption;
 import java.util.Arrays;
 import java.util.stream.Collectors;
 import java.util.stream.IntStream;
 public class GenerateCaseFolding {
    public static void main(String[] args) throws Throwable {
        if (args.length != 3) {
            System.err.println("Usage: java GenerateCaseFolding TemplateFile CaseFolding.txt CaseFolding.java");
            System.exit(1);
        }
        var templateFile = Paths.get(args[0]);
        var caseFoldingTxt = Paths.get(args[1]);
        var genSrcFile = Paths.get(args[2]);
        // java.lang
        var supportedTypes = "^.*; [CF]; .*$";  // full/1:M case folding
        String[][] caseFoldings = Files.lines(caseFoldingTxt)
                .filter(line -> !line.startsWith("#") && line.matches(supportedTypes))
                .map(line -> {
                    var fields = line.split("; ");
                    var cp = fields[0];
                    fields = fields[2].trim().split(" ");
                    var folding = new String[fields.length + 1];
                    folding[0] = cp;
                    System.arraycopy(fields, 0, folding, 1, fields.length);
                    return folding;
                })
                .toArray(size -> new String[size][]);
        // util.regex
        var expandedSupportedTypes = "^.*; [CTS]; .*$";
        var expanded_caseFoldingEntries = Files.lines(caseFoldingTxt)
                .filter(line -> !line.startsWith("#") && line.matches(expandedSupportedTypes))
                .map(line -> {
                    String[] cols = line.split("; ");
                    return new String[]{cols[0], cols[1], cols[2]};
                })
                .filter(cols -> {
                    // the folding case doesn't map back to the original char.
                    var cp1 = Integer.parseInt(cols[0], 16);
                    var cp2 = Integer.parseInt(cols[2], 16);
                    return Character.toUpperCase(cp2) != cp1 && Character.toLowerCase(cp2) != cp1;
                })
                .map(cols -> String.format("        entry(0x%s, 0x%s)", cols[0], cols[2]))
                .collect(Collectors.joining(",\n", "", ""));
        // hack, hack, hack! the logic does not pick 0131. just add manually to support 'I's.
        // 0049; T; 0131; # LATIN CAPITAL LETTER I
        final String T_0x0131_0x49 = String.format("        entry(0x%04x, 0x%04x),\n", 0x0131, 0x49);
        Files.write(
                genSrcFile,
                Files.lines(templateFile)
                        .map(line -> line.contains("%%%Entries") ? genFoldingEntries(caseFoldings) : line)
                        .map(line -> line.contains("%%%Expanded_Case_Map_Entries") ? T_0x0131_0x49 + expanded_caseFoldingEntries : line)
                        .collect(Collectors.toList()),
                StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING);
    }
    private static long foldingToLong(String[] folding) {
        int cp = Integer.parseInt(folding[0], 16);
        long value = (long)Integer.parseInt(folding[1], 16);
        if (!Character.isSupplementaryCodePoint(cp) && folding.length != 2) {
            var shift = 16;
            for (int j = 2; j < folding.length; j++) {
                value |= (long)Integer.parseInt(folding[j], 16) << shift;
                shift <<= 1;
            }
            value = value | (long) (folding.length - 1) << 48;
        }
        return value;
    }
    private static String genFoldingEntries(String[][] foldings) {
        StringBuilder sb = new StringBuilder();
        sb.append("    private static final int[] CASE_FOLDING_CPS = {\n");
        int width = 10;
        for (int i = 0; i < foldings.length; i++) {
            if (i % width == 0)
                sb.append("        ");
            sb.append(String.format("0X%s", foldings[i][0]));
            if (i < foldings.length - 1)
                sb.append(", ");
            if (i % width == width - 1 || i == foldings.length - 1)
                sb.append("\n");
        }
        sb.append("    };\n\n");
        sb.append("    private static final long[] CASE_FOLDING_VALUES = {\n");
        width = 6;
        for (int i = 0; i < foldings.length; i++) {
            if (i % width == 0)
                sb.append("        "); // indent
            sb.append(String.format("0x%013xL", foldingToLong(foldings[i])));
            if (i < foldings.length - 1)
                sb.append(", ");
            if (i % width == width - 1 || i == foldings.length - 1) {
                sb.append("\n");
            }
        }
        sb.append("    };\n");
        return sb.toString();
    }
 }
--- a/make/modules/java.base/gensrc/GensrcCharacterData.gmk
+++ b/make/modules/java.base/gensrc/GensrcCharacterData.gmk
@ -72,5 +72,22 @@ TARGETS += $(GENSRC_CHARACTERDATA)
 ################################################################################
 GENSRC_STRINGCASEFOLDING := $(SUPPORT_OUTPUTDIR)/gensrc/java.base/jdk/internal/lang/CaseFolding.java
 STRINGCASEFOLDING_TEMPLATE := $(MODULE_SRC)/share/classes/jdk/internal/lang/CaseFolding.java.template
 CASEFOLDINGTXT := $(MODULE_SRC)/share/data/unicodedata/CaseFolding.txt
 $(GENSRC_STRINGCASEFOLDING): $(BUILD_TOOLS_JDK) $(STRINGCASEFOLDING_TEMPLATE) $(CASEFOLDINGTXT)
 	$(call LogInfo, Generating $@)
 	$(call MakeTargetDir)
 	$(TOOL_GENERATECASEFOLDING) \
 	    $(STRINGCASEFOLDING_TEMPLATE) \
 	    $(CASEFOLDINGTXT) \
 	    $(GENSRC_STRINGCASEFOLDING)
 TARGETS += $(GENSRC_STRINGCASEFOLDING)
 endif # include guard
 include MakeIncludeEnd.gmk
--- a/make/modules/java.base/gensrc/GensrcRegex.gmk
+++ b/make/modules/java.base/gensrc/GensrcRegex.gmk
@ -50,22 +50,5 @@ TARGETS += $(GENSRC_INDICCONJUNCTBREAK)
 ################################################################################
 GENSRC_CASEFOLDING := $(SUPPORT_OUTPUTDIR)/gensrc/java.base/jdk/internal/util/regex/CaseFolding.java
 CASEFOLDINGTEMP := $(MODULE_SRC)/share/classes/jdk/internal/util/regex/CaseFolding.java.template
 CASEFOLDINGTXT := $(MODULE_SRC)/share/data/unicodedata/CaseFolding.txt
 $(GENSRC_CASEFOLDING): $(BUILD_TOOLS_JDK) $(CASEFOLDINGTEMP) $(CASEFOLDINGTXT)
 	$(call LogInfo, Generating $@)
 	$(call MakeTargetDir)
 	$(TOOL_GENERATECASEFOLDING) \
 	    $(CASEFOLDINGTEMP) \
 	    $(CASEFOLDINGTXT) \
 	    $(GENSRC_CASEFOLDING)
 TARGETS += $(GENSRC_CASEFOLDING)
 ################################################################################
 endif # include guard
 include MakeIncludeEnd.gmk
--- a/src/java.base/share/classes/java/lang/String.java
+++ b/src/java.base/share/classes/java/lang/String.java
@ -117,9 +117,38 @@ import sun.nio.cs.UTF_8;
 * Unicode code points (i.e., characters), in addition to those for
 * dealing with Unicode code units (i.e., {@code char} values).
 *
- * <p>Unless otherwise noted, methods for comparing Strings do not take locale
+ * <p><b>String comparison and case-insensitive matching</b>
- * into account.  The {@link java.text.Collator} class provides methods for
+ *
- * finer-grain, locale-sensitive String comparison.
+ * <p>There are several related ways to compare {@code String} values; choose
 * the one whose semantics fit your purpose:
 *
 * <ul>
 *   <li><b>Exact content equality</b> — {@link #equals(Object)} checks that two
 *       strings contain the identical char sequence of UTF-16 code units. This is
 *       a strict, case-sensitive comparison suitable for exact matching, hashing
 *       and any situation that requires bit-for-bit stability.</li>
 *
 *   <li><b>Simple case-insensitive equality</b> — {@link #equalsIgnoreCase(String)}
 *       (and the corresponding {@link #compareToIgnoreCase(String)} and {@link #CASE_INSENSITIVE_ORDER})
 *       performs a per-code-point, locale-independent comparison using
 *       {@link Character#toUpperCase(int)} and {@link Character#toLowerCase(int)}.
 *       It is convenient for many common case-insensitive checks.</li>
 *
 *   <li><b>Unicode case-folded equivalence</b> — {@link #equalsFoldCase(String)}
 *       (and the corresponding {@link #compareToFoldCase(String)} and {@link #UNICODE_CASEFOLD_ORDER})
 *       implement the Unicode <em>{@index "full case folding"}</em> rules defined in
 *       <a href="https://www.unicode.org/Public/UCD/latest/ucd/CaseFolding.txt">Unicode CaseFolding.txt</a>.
 *       Case folding is locale-independent and language-neutral and may map a single code
 *       point to multiple code points (1:M mappings). For example, the German sharp
 *       s ({@code U+00DF}) is folded to the sequence {@code "ss"}.
 *       Use these methods when you need Unicode-compliant
 *       <a href="https://www.unicode.org/versions/latest/core-spec/chapter-5/#G21790">
 *       caseless matching</a>, searching, or ordering.</li>
 * </ul>
 *
 * <p>Unless otherwise noted, methods for comparing Strings do not take locale into
 * account. The {@link java.text.Collator} class provides methods for finer-grain,
 * locale-sensitive String comparison.
 *
 * @implNote The implementation of the string concatenation operator is left to
 * the discretion of a Java compiler, as long as the compiler ultimately conforms
@ -2179,6 +2208,7 @@ public final class String
     *          false} otherwise
     *
     * @see  #equals(Object)
     * @see  #equalsFoldCase(String)
     * @see  #codePoints()
     */
    public boolean equalsIgnoreCase(String anotherString) {
@ -2188,6 +2218,57 @@ public final class String
                && regionMatches(true, 0, anotherString, 0, length());
    }
    /**
     * Compares this {@code String} to another {@code String} for equality,
     * using <em>{@index "Unicode case folding"}</em>. Two strings are considered equal
     * by this method if their case-folded forms are identical.
     * <p>
     * Case folding is defined by the Unicode Standard in
     * <a href="https://www.unicode.org/Public/UCD/latest/ucd/CaseFolding.txt">CaseFolding.txt</a>,
     * including 1:M mappings. For example, {@code "Fuß".equalsFoldCase("FUSS")}
     * returns {@code true}, since the character {@code U+00DF} (sharp s) folds
     * to {@code "ss"}.
     * <p>
     * Case folding is locale-independent and language-neutral, unlike
     * locale-sensitive transformations such as {@link #toLowerCase()} or
     * {@link #toUpperCase()}. It is intended for caseless matching,
     * searching, and indexing.
     *
     * @apiNote
     * This method is the Unicode-compliant alternative to
     * {@link #equalsIgnoreCase(String)}. It implements full case folding as
     * defined by the Unicode Standard, which may differ from the simpler
     * per-character mapping performed by {@code equalsIgnoreCase}.
     * For example:
     * {@snippet lang=java :
     * String a = "Fuß";
     * String b = "FUSS";
     * boolean equalsFoldCase = a.equalsFoldCase(b);       // returns true
     * boolean equalsIgnoreCase = a.equalsIgnoreCase(b);   // returns false
     * }
     *
     * @param  anotherString
     *         The {@code String} to compare this {@code String} against
     *
     * @return  {@code true} if the given object is not {@code null} and represents
     *          the same sequence of characters as this string under Unicode case
     *          folding; {@code false} otherwise.
     *
     * @spec    https://www.unicode.org/versions/latest/core-spec/chapter-5/#G21790 Unicode Caseless Matching
     * @see     #compareToFoldCase(String)
     * @see     #equalsIgnoreCase(String)
     * @since   26
     */
    public boolean equalsFoldCase(String anotherString) {
        if (this == anotherString) {
            return true;
        }
        if (anotherString == null) {
            return false;
        }
        return UNICODE_CASEFOLD_ORDER.compare(this, anotherString) == 0;
    }
    /**
     * Compares two strings lexicographically.
     * The comparison is based on the Unicode value of each character in
@ -2303,12 +2384,86 @@ public final class String
     *          than this String, ignoring case considerations.
     * @see     java.text.Collator
     * @see     #codePoints()
     * @see     #compareToFoldCase(String)
     * @since   1.2
     */
    public int compareToIgnoreCase(String str) {
        return CASE_INSENSITIVE_ORDER.compare(this, str);
    }
    /**
     * A Comparator that orders {@code String} objects as by
     * {@link #compareToFoldCase(String) compareToFoldCase()}.
     *
     * @see     #compareToFoldCase(String)
     * @since   26
     */
    public static final Comparator<String> UNICODE_CASEFOLD_ORDER
            = new FoldCaseComparator();
    private static class FoldCaseComparator implements Comparator<String> {
        @Override
        public int compare(String s1, String s2) {
            byte[] v1 = s1.value;
            byte[] v2 = s2.value;
            if (s1.coder == s2.coder()) {
                return s1.coder == LATIN1 ? StringLatin1.compareToFC(v1, v2)
                                          : StringUTF16.compareToFC(v1, v2);
            }
            return s1.coder == LATIN1 ? StringLatin1.compareToFC_UTF16(v1, v2)
                                      : StringUTF16.compareToFC_Latin1(v1, v2);
        }
    }
    /**
     * Compares two strings lexicographically using <em>{@index "Unicode case folding"}</em>.
     * This method returns an integer whose sign is that of calling {@code compareTo}
     * on the Unicode case folded version of the strings. Unicode Case folding
     * eliminates differences in case according to the Unicode Standard, using the
     * mappings defined in
     * <a href="https://www.unicode.org/Public/UCD/latest/ucd/CaseFolding.txt">CaseFolding.txt</a>,
     * including 1:M mappings, such as {@code"ß"} → {@code }"ss"}.
     * <p>
     * Case folding is a locale-independent, language-neutral form of case mapping,
     * primarily intended for caseless matching. Unlike {@link #compareToIgnoreCase(String)},
     * which applies a simpler locale-insensitive uppercase mapping. This method
     * follows the Unicode <em>{@index "full"}</em> case folding, providing stable and
     * consistent results across all environments.
     * <p>
     * Note that this method does <em>not</em> take locale into account, and may
     * produce results that differ from locale-sensitive ordering. Use
     * {@link java.text.Collator} for locale-sensitive comparison.
     *
     * @apiNote
     * This method is the Unicode-compliant alternative to
     * {@link #compareToIgnoreCase(String)}. It implements the
     * <em>{@index "full case folding"}</em> as defined by the Unicode Standard, which
     * may differ from the simpler per-character mapping performed by
     * {@code compareToIgnoreCase}.
     * For example:
     * {@snippet lang=java :
     * String a = "Fuß";
     * String b = "FUSS";
     * int cmpFoldCase = a.compareToFoldCase(b);     // returns 0
     * int cmpIgnoreCase = a.compareToIgnoreCase(b); // returns > 0
     * }
     *
     * @param   str   the {@code String} to be compared.
     * @return  a negative integer, zero, or a positive integer as the specified
     *          String is greater than, equal to, or less than this String,
     *          ignoring case considerations by case folding.
     *
     * @spec    https://www.unicode.org/versions/latest/core-spec/chapter-5/#G21790 Unicode Caseless Matching
     * @see     java.text.Collator
     * @see     #compareToIgnoreCase(String)
     * @see     #equalsFoldCase(String)
     * @since   26
     */
    public int compareToFoldCase(String str) {
        return UNICODE_CASEFOLD_ORDER.compare(this, str);
    }
    /**
     * Tests if two string regions are equal.
     * <p>
--- a/src/java.base/share/classes/java/lang/StringLatin1.java
+++ b/src/java.base/share/classes/java/lang/StringLatin1.java
@ -32,6 +32,8 @@ import java.util.function.Consumer;
 import java.util.function.IntConsumer;
 import java.util.stream.Stream;
 import java.util.stream.StreamSupport;
 import jdk.internal.lang.CaseFolding;
 import jdk.internal.util.ArraysSupport;
 import jdk.internal.vm.annotation.IntrinsicCandidate;
@ -179,6 +181,128 @@ final class StringLatin1 {
        return len1 - len2;
    }
    private static int compareToFC0(byte[] value, int off, int last, byte[] other, int ooff, int olast) {
        int k1 = off, k2 = ooff;
        boolean lo1 = false, lo2 = false;  // true if we have a leftover 's' from u+00df -> ss
        while ((k1 < last || lo1) && (k2 < olast || lo2)) {
            int c1, c2;
            if (lo1) {
                c1 = 0x73; // leftover 's'
                lo1 = false;
            } else {
                c1 = getChar(value, k1++);
                if (c1 == 0xdf) {
                    c1 = 0x73;
                    lo1 = true;
                }
            }
            if (lo2) {
                c2 = 0x73; // 's'
                lo2 = false;
            } else {
                c2 = getChar(other, k2++);
                if (c2 == 0xdf) {
                    c2 = 0x73;
                    lo2 = true;
                }
            }
            if (!CharacterDataLatin1.equalsIgnoreCase((byte)c1, (byte)c2)) {
                return Character.toLowerCase(c1) - Character.toLowerCase(c2);
            }
        }
        if (k1 < last || lo1) {
            return 1;
        }
        if (k2 < olast || lo2) {
            return -1;
        }
        return 0;
    }
    static int compareToFC(byte[] value, byte[] other) {
        int len = value.length;
        int olen = other.length;
        int lim = Math.min(len, olen);
        for (int k = 0; k < lim; k++) {
            byte b1 = value[k];
            byte b2 = other[k];
            if (!CharacterDataLatin1.equalsIgnoreCase(b1, b2)) {
                int c1 = b1 & 0xff;
                int c2 = b2 & 0xff;
                if (c1 == 0xdf || c2 == 0xdf) {  // 0xdf is the only 1:M in latin1 range
                    return compareToFC0(value, k, len, other, k, olen);
                }
                return Character.toLowerCase(c1) - Character.toLowerCase(c2);
            }
        }
        return len - olen;
    }
    private static int compareToFC0_UTF16(byte[] value, int off, int last, byte[] other, int ooff, int olast) {
        int f1 = 0, f2 = 0;
        int k1 = off, k2 = ooff;
        while ((k1 < last || f1 != 0) && (k2 < olast || f2 != 0)) {
            int c1, c2;
            if (f1 != 0) {
                c1 = (f1 & 0xffff); f1 >>>= 16;
            } else {
                c1 = getChar(value, k1++);
                var f = CaseFolding.fold(c1);
                if (CaseFolding.isSingleCodePoint(f)) {
                    c1 = (int)(f & 0xfffff);
                } else {
                    c1 = (int)f & 0xffff;
                    f1 = (int)(f >>> 16);
                }
            }
            if (f2 != 0) {
                c2 = f2 & 0xffff; f2 >>>= 16;
            } else {
                c2 = StringUTF16.codePointAt(other, k2, olast, true);
                k2 += Character.charCount(c2);
                var f = CaseFolding.fold(c2);
                if (CaseFolding.isSingleCodePoint(f)) {
                    c2 = (int)(f & 0xfffff);
                } else {
                    c2 = (int)(f & 0xffff);
                    f2 = (int)(f >>> 16);
                }
            }
            if (c1 != c2) {
                return c1 - c2;
            }
        }
        if (k1 < last || f1 != 0) {
            return 1;
        }
        if (k2 < olast || f2 != 0) {
            return -1;
        }
        return 0;
    }
    // latin1 vs utf16
    static int compareToFC_UTF16(byte[] value, byte[] other) {
        int last = length(value);
        int olast = StringUTF16.length(other);
        int lim = Math.min(last, olast);
        for (int k = 0; k < lim; k++) {
            int cp1 = getChar(value, k);
            int cp2 = StringUTF16.codePointAt(other, k, olast, true);
            if (cp1 != cp2) {
                long cf1 = CaseFolding.fold(cp1);
                long cf2 = CaseFolding.fold(cp2);
                if (cf1 != cf2) {
                    if (!CaseFolding.isSingleCodePoint(cf1) || !CaseFolding.isSingleCodePoint(cf2)) {
                        return compareToFC0_UTF16(value, k, last, other, k, olast);
                    }
                    return (int)(cf1 - cf2);
                }
            }
        }
        return last - olast;
    }
    static int hashCode(byte[] value) {
        return ArraysSupport.hashCodeOfUnsigned(value, 0, value.length, 0);
    }
--- a/src/java.base/share/classes/java/lang/StringUTF16.java
+++ b/src/java.base/share/classes/java/lang/StringUTF16.java
@ -34,6 +34,7 @@ import java.util.function.IntConsumer;
 import java.util.stream.Stream;
 import java.util.stream.StreamSupport;
 import jdk.internal.lang.CaseFolding;
 import jdk.internal.misc.Unsafe;
 import jdk.internal.util.ArraysSupport;
 import jdk.internal.vm.annotation.ForceInline;
@ -93,7 +94,7 @@ final class StringUTF16 {
        return value.length >> 1;
    }
-    private static int codePointAt(byte[] value, int index, int end, boolean checked) {
+    static int codePointAt(byte[] value, int index, int end, boolean checked) {
        assert index < end;
        if (checked) {
            checkIndex(index, value);
@ -592,6 +593,77 @@ final class StringUTF16 {
        return -StringLatin1.compareToCI_UTF16(other, value);
    }
    public static int compareToFC_Latin1(byte[] value, byte[] other) {
        return -StringLatin1.compareToFC_UTF16(other, value);
    }
    private static int compareToFC0(byte[] value, int off, int last, byte[] other, int ooff, int olast) {
        int f1 = 0, f2 = 0;
        int k1 = off, k2 = ooff;
        while ((k1 < last || f1 != 0) && (k2 < olast || f2 != 0)) {
            int c1, c2;
            if (f1 != 0) {
                c1 = f1 & 0xffff; f1 >>>= 16;
            } else {
                c1 = StringUTF16.codePointAt(value, k1, last, true);
                k1 += Character.charCount(c1);
                var f = CaseFolding.fold(c1);
                if (CaseFolding.isSingleCodePoint(f)) {
                    c1 = (int)(f & 0xfffff);
                } else {
                    c1 = (int)(f & 0xffff);
                    f1 = (int)(f >> 16);
                }
            }
            if (f2 != 0) {
                c2 = f2 & 0xffff; f2 >>>= 16;
            } else {
                c2 = StringUTF16.codePointAt(other, k2, olast, true);
                k2 += Character.charCount(c2);
                var f = CaseFolding.fold(c2);
                if (CaseFolding.isSingleCodePoint(f)) {
                    c2 = (int)(f & 0xfffff);
                } else {
                    c2 = (int)(f & 0xffff);
                    f2 = (int)(f >>> 16);
                }
            }
            if (c1 != c2) {
                return c1 - c2;
            }
        }
        if (k1 < last || f1 != 0) {
            return 1;
        }
        if (k2 < olast || f2 != 0) {
            return -1;
        }
        return 0;
    }
    public static int compareToFC(byte[] value, byte[] other) {
        int tlast = length(value);
        int olast = length(other);
        int lim = Math.min(tlast, olast);
        int k = 0;
        while (k < lim) {
            int cp1 = codePointAt(value, k, tlast, true);
            int cp2 = codePointAt(other, k, olast, true);
            if (cp1 != cp2) {
                long cf1 = CaseFolding.fold(cp1);
                long cf2 = CaseFolding.fold(cp2);
                if (cf1 != cf2) {
                    if (!CaseFolding.isSingleCodePoint(cf1) || !CaseFolding.isSingleCodePoint(cf2)) {
                        return compareToFC0(value, k, tlast, other, k, olast);
                    }
                    return (int) cf1 - (int) cf2;
                }
            }
            k += Character.charCount(cp1);
        }
        return tlast - olast;
    }
    static int hashCode(byte[] value) {
        return ArraysSupport.hashCodeOfUTF16(value, 0, value.length >> 1, 0);
    }
--- a/src/java.base/share/classes/java/util/regex/Pattern.java
+++ b/src/java.base/share/classes/java/util/regex/Pattern.java
@ -43,8 +43,8 @@ import java.util.function.Predicate;
 import java.util.stream.Stream;
 import java.util.stream.StreamSupport;
 import jdk.internal.lang.CaseFolding;
 import jdk.internal.util.ArraysSupport;
 import jdk.internal.util.regex.CaseFolding;
 import jdk.internal.util.regex.Grapheme;
 /**
--- a/src/java.base/share/classes/jdk/internal/lang/CaseFolding.java.template
+++ b/src/java.base/share/classes/jdk/internal/lang/CaseFolding.java.template
@ -0,0 +1,208 @@
 /*
 * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.  Oracle designates this
 * particular file as subject to the "Classpath" exception as provided
 * by Oracle in the LICENSE file that accompanied this code.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 */
 package jdk.internal.lang;
 import java.util.Arrays;
 import java.util.Map;
 import java.util.stream.Collectors;
 import java.util.stream.IntStream;
 import static java.util.Map.entry;
 /**
 * Utility class that handles Unicode case folding properties defined in
 * CasingFolding.txt, including 1:M full case folding.
 */
 public final class CaseFolding {
    private CaseFolding()  {}
   /**
    * Tests whether the specified code point has a folding mapping entry defined.
    *
    * @param  cp
    *         the Unicode code point to test
    * @return {@code true} if the given code point has a case folding mapping entry
    *         defined in (@code caseFoldingMap}, {@code false} otherwise
    */
    public static boolean isDefined(int cp) {
         return getDefined(cp) != -1;
    }
   /**
    * Returns the case-folded form of the specified code point according
    * to the Unicode case folding mappings.
    * <p>
    * If the code point has no case folding mapping defined, this method returns
    * the original code point.
    *
    * Possible combinations of the returning case-folding form as a long value
    *
    *  +---+---------+--------+---------+--------+--------+
    *  | 1:1 mapping |  0000  |   0000  |  000x  |  xxxx  |  0041 => 0061 or 1E921 => 1E943
    *  +---+---------+--------+---------+--------+--------+
    *  | 1:2 mapping |  0002  |   0000  |  xxxx  |  xxxx  |  FB02 => 0066 006C
    *  +---+---------+--------+---------+--------+--------+
    *  | 1:3 mapping |  0003  |   xxxx  |  xxxx  |  xxxx  |  FB03 => 0066 0066 0069
    *  +---+---------+--------+---------+--------+--------+
    *
    * @param  cp
    *         the Unicode code point to fold
    * @return a long value representing the case-folded form of the input
    *         code point, encoded as TBD
    */
    public static long fold(int cp) {
        var fold = getDefined(cp);
        return fold == -1 ? cp : fold;
    }
    public static boolean isSingleCodePoint(long fold) {
        return (fold >> 48) == 0;
    }
   /**
    * Returns an expansion set to "close" a given regex Unicode character class range for case-sensitive
    * matching, according to the
    * <a href="https://www.unicode.org/reports/tr18/#Simple_Loose_Matches">Simple Loose Matches</a>
    * rule defined in Unicode Technical Standard #18: Unicode Regular Expressions.
    * <p>
    * To conform with Level 1 of UTS #18, specifically RL1.5: Simple Loose Matches, simple case folding must
    * be applied to literals and (optionally) to character classes. When applied to character classes, each
    * character class is expected to be closed under simple case folding. See the standard for the
    * detailed explanation and example of "closed".
    * <p>
    * RL1.5 states: To meet this requirement, an implementation that supports case-sensitive matching should
    * <ol>
    * <li>Provide at least the simple, default Unicode case-insensitive matching, and</li>
    * <li>Specify which character properties or constructs are closed under the matching.</li>
    * </ol>
    * <p>
    * In the {@code  Pattern} implementation, 5 types of constructs maybe case-sensitive when matching:
    * back-refs, string slice (sequences), single, family(char-property) and class range. Single and
    * family may appears independently or within a class.
    * <p>
    * For loose/case-insensitive matching, the back-refs, slices and singles apply {@code toUpperCase} and
    * {@code toLowerCase} to both the pattern and the input string. This effectively 'close' the class for
    * matching.
    * <p>
    * The family/char-properties are not "closed" and should remain unchanged. This is acceptable per RL1.5,
    * if their behavior is clearly specified.
    * <p>
    * This method addresses that requirement for the "range" construct within in character class by computing
    * the additional characters that should be included to close the range under simple case folding:
    * <p>
    * For each character in the input range {@code [start, end]} (inclusive), if the character has a simple
    * case folding mapping in Unicode's CaseFolding.txt, the mapping is not a round-trip map, and the mapped
    * character is not already in the range, then that mapped character (typically lowercase) is added to
    * the expansion set.
    * <p>
    * This allows regex character class "range" implementation to use the returned expansion set to support
    * additional case-insensitive matching, without duplicating characters already covered by the existing
    * regex range implementation. The expectation is the matching is done using both the uppercase and
    * lowercase forms of the input character, for example
    *
    * <pre>{@code
    *
    *     ch -> inRange(lower, Character.toUpperCase(ch), upper) ||
    *           inRange(lower, Character.toLower(ch), upper) ||
    *           additionalClosingCharacters.contains(Character.toUpperCase(ch)) ||
    *           additionalClosingCharacters.contains(Character.toUpperCase(ch))
    * }</pre>
    *
    * @param start the starting code point of the character range
    * @param end the ending code point of the character range
    * @return a {@code int[]} containing the all simple case equivalents of characters in the range, excluding
    *         those already in the range
    * @spec https://www.unicode.org/reports/tr18/#Simple_Loose_Matches
    */
    public static int[] getClassRangeClosingCharacters(int start, int end) {
        int[] expanded = new int[expanded_case_cps.length];
        int off = 0;
        for (int cp : expanded_case_cps) {
            if (cp >= start && cp <= end) {
                int folding = expanded_case_map.get(cp);
                if (folding < start || folding > end) {
                    expanded[off++] = folding;
                }
            }
        }
        return Arrays.copyOf(expanded, off);
    }
    private static final Map<Integer, Integer> expanded_case_map = Map.ofEntries(
 %%%Expanded_Case_Map_Entries
    );
    private static final int[] expanded_case_cps = expanded_case_map.keySet()
      .stream()
      .mapToInt(Integer::intValue)
      .toArray();
    private static final int HASH_CP = 0;
    private static final int HASH_INDEX = 1;
    private static final int HASH_NEXT = 2;
    private static int[][] hashKeys(int[] keys) {
        var hashes = new int[keys.length << 1][3];  // cp + hash + next
        var off = keys.length;
        for (int i = 0; i < keys.length; i++) {
            var cp = keys[i];
            var hash = cp % keys.length;
            while (hashes[hash][HASH_CP] != 0) {
                var next = hashes[hash][HASH_NEXT];
                if (next == 0) {
                    hashes[hash][HASH_NEXT] = off;
                    hash = off++;
                    break;
                } else {
                    hash = next;
                }
            }
            hashes[hash][HASH_CP] = cp;
            hashes[hash][HASH_INDEX] = i;
        }
        return Arrays.copyOf(hashes, off);
    }
    private static long getDefined(int cp) {
        var hashes = CASE_FOLDING_HASHES;
        var length = CASE_FOLDING_CPS.length;  // hashed based on total defined.
        var hash = cp % length;
        while (hashes[hash][HASH_CP] != cp) {
            var next = hashes[hash][HASH_NEXT];
            if (next == 0) {
                return -1;   // hash miss
            }
            hash = next;
        }
        var index = hashes[hash][HASH_INDEX];
        return CASE_FOLDING_VALUES[index];
    }
 %%%Entries
    private static final int[][] CASE_FOLDING_HASHES = hashKeys(CASE_FOLDING_CPS);
 }
--- a/src/java.base/share/classes/jdk/internal/util/regex/CaseFolding.java.template
+++ b/src/java.base/share/classes/jdk/internal/util/regex/CaseFolding.java.template
@ -1,116 +0,0 @@
 /*
 * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.  Oracle designates this
 * particular file as subject to the "Classpath" exception as provided
 * by Oracle in the LICENSE file that accompanied this code.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 */
 package jdk.internal.util.regex;
 import java.util.Arrays;
 import java.util.Map;
 import java.util.Objects;
 import static java.util.Map.entry;
 public final class CaseFolding {
    private static final Map<Integer, Integer> expanded_case_map = Map.ofEntries(
 %%%Entries
    );
    private static final int[] expanded_case_cps = expanded_case_map.keySet()
      .stream()
      .mapToInt(Integer::intValue)
      .toArray();
    private CaseFolding()  {}
    /**
     * Returns an expansion set to "close" a given regex Unicode character class range for case-sensitive
     * matching, according to the
     * <a href="https://www.unicode.org/reports/tr18/#Simple_Loose_Matches">Simple Loose Matches</a>
     * rule defined in Unicode Technical Standard #18: Unicode Regular Expressions.
     * <p>
     * To conform with Level 1 of UTS #18, specifically RL1.5: Simple Loose Matches, simple case folding must
     * be applied to literals and (optionally) to character classes. When applied to character classes, each
     * character class is expected to be closed under simple case folding. See the standard for the
     * detailed explanation and example of "closed".
     * <p>
     * RL1.5 states: To meet this requirement, an implementation that supports case-sensitive matching should
     * <ol>
     * <li>Provide at least the simple, default Unicode case-insensitive matching, and</li>
     * <li>Specify which character properties or constructs are closed under the matching.</li>
     * </ol>
     * <p>
     * In the {@code  Pattern} implementation, 5 types of constructs maybe case-sensitive when matching:
     * back-refs, string slice (sequences), single, family(char-property) and class range. Single and
     * family may appears independently or within a class.
     * <p>
     * For loose/case-insensitive matching, the back-refs, slices and singles apply {code toUpperCase} and
     * {@code toLowerCase} to both the pattern and the input string. This effectively 'close' the class for
     * matching.
     * <p>
     * The family/char-properties are not "closed" and should remain unchanged. This is acceptable per RL1.5,
     * if their behavior is clearly specified.
     * <p>
     * This method addresses that requirement for the "range" construct within in character class by computing
     * the additional characters that should be included to close the range under simple case folding:
     * <p>
     * For each character in the input range {@code [start, end]} (inclusive), if the character has a simple
     * case folding mapping in Unicode's CaseFolding.txt, the mapping is not a round-trip map, and the mapped
     * character is not already in the range, then that mapped character (typically lowercase) is added to
     * the expansion set.
     * <p>
     * This allows regex character class "range" implementation to use the returned expansion set to support
     * additional case-insensitive matching, without duplicating characters already covered by the existing
     * regex range implementation. The expectation is the matching is done using both the uppercase and
     * lowercase forms of the input character, for example
     *
     * <pre>{@code
     *
     *     ch -> inRange(lower, Character.toUpperCase(ch), upper) ||
     *           inRange(lower, Character.toLower(ch), upper) ||
     *           additionalClosingCharacters.contains(Character.toUpperCase(ch)) ||
     *           additionalClosingCharacters.contains(Character.toUpperCase(ch))
     * }</pre>
     *
     * <p>
     * @spec https://www.unicode.org/reports/tr18/#Simple_Loose_Matches
     * @param start the starting code point of the character range
     * @param end the ending code point of the character range
     * @return a {@code int[]} containing the all simple case equivalents of characters in the range, excluding
     *         those already in the range
     */
    public static int[] getClassRangeClosingCharacters(int start, int end) {
        int[] expanded = new int[expanded_case_cps.length];
        int off = 0;
        for (int cp : expanded_case_cps) {
            if (cp >= start && cp <= end) {
                int folding = expanded_case_map.get(cp);
                if (folding < start || folding > end) {
                    expanded[off++] = folding;
                }
            }
        }
        return Arrays.copyOf(expanded, off);
    }
 }
--- a/test/jdk/java/lang/String/UnicodeCaseFoldingTest.java
+++ b/test/jdk/java/lang/String/UnicodeCaseFoldingTest.java
@ -0,0 +1,329 @@
 /*
 * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 */
 /**
 * @test
 * @summary tests unicode case-folding based String comparison and equality
 * @bug 4397357
 * @library /lib/testlibrary/java/lang
 * @modules java.base/jdk.internal.lang:+open
 * @run junit/othervm
 * UnicodeCaseFoldingTest
 */
 import java.nio.file.Files;
 import java.util.stream.Stream;
 import java.util.stream.Collectors;
 import java.util.ArrayList;
 import org.junit.jupiter.params.ParameterizedTest;
 import org.junit.jupiter.params.provider.Arguments;
 import org.junit.jupiter.params.provider.MethodSource;
 import org.junit.jupiter.api.Test;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import jdk.internal.lang.CaseFolding;
 public class UnicodeCaseFoldingTest {
    @Test
    void testAllCommnFullCodePointsListedInCaseFoldinigTxt() throws Throwable {
        var filter = "^.*; [CF]; .*$";  // C=common, F=full, for full case folding
        var results = Files.lines(UCDFiles.CASEFOLDING)
                .filter(line -> !line.startsWith("#") && line.matches(filter))
                .map(line -> {
                    var fields = line.split("; ");
                    var cp = Integer.parseInt(fields[0], 16);
                    fields = fields[2].trim().split(" ");
                    var folding = new int[fields.length];
                    for (int i = 0; i < folding.length; i++) {
                        folding[i] = Integer.parseInt(fields[i], 16);
                    }
                    var source = new String(Character.toChars(cp));
                    var expected = new String(folding, 0, folding.length);
                    // (1) Verify the folding result matches expected
                    assertEquals(expected, foldCase(source), "CaseFolding.fold(): ");
                    // (2) Verify compareToFoldCase() result
                    assertEquals(0, source.compareToFoldCase(expected), "source.compareToFoldCase(expected)");
                    assertEquals(0, expected.compareToFoldCase(source), "expected.compareToFoldCase(source)");
                    // (3) Verify equalsFoldCase() result
                    assertEquals(true, source.equalsFoldCase(expected), "source.equalsFoldCase(expected)");
                    assertEquals(true, expected.equalsFoldCase(source), "expected.equalsFoldCase(source)");
                    return null;
                })
                .filter(error -> error != null)
                .toArray();
        assertEquals(0, results.length);
    }
    @Test
    void testAllSimpleCodePointsListedInCaseFoldinigTxt() throws Throwable {
        // S=simple, for simple case folding. The simple case folding should still matches
        var filter = "^.*; [S]; .*$";
        var results = Files.lines(UCDFiles.CASEFOLDING)
                .filter(line -> !line.startsWith("#") && line.matches(filter))
                .map(line -> {
                    var fields = line.split("; ");
                    var cp = Integer.parseInt(fields[0], 16);
                    fields = fields[2].trim().split(" ");
                    var folding = new int[fields.length];
                    for (int i = 0; i < folding.length; i++) {
                        folding[i] = Integer.parseInt(fields[i], 16);
                    }
                    var source = new String(Character.toChars(cp));
                    var expected = new String(folding, 0, folding.length);
                    // (1) Verify compareToFoldCase() result
                    assertEquals(0, source.compareToFoldCase(expected), "source.compareToFoldCase(expected)");
                    assertEquals(0, expected.compareToFoldCase(source), "expected.compareToFoldCase(source)");
                    // (2) Verify equalsFoldCase() result
                    assertEquals(true, source.equalsFoldCase(expected), "source.equalsFoldCase(expected)");
                    assertEquals(true, expected.equalsFoldCase(source), "expected.equalsFoldCase(source)");
                    return null;
                })
                .filter(error -> error != null)
                .toArray();
        assertEquals(0, results.length);
    }
    @Test
    public void testAllCodePointsFoldToThemselvesIfNotListed() throws Exception {
        // Collect all code points that appear in CaseFolding.txt
        var listed = Files.lines(UCDFiles.CASEFOLDING)
                .filter(line -> !line.startsWith("#") && line.matches("^.*; [CF]; .*$"))
                .map(line -> Integer.parseInt(line.split("; ")[0], 16))
                .collect(Collectors.toSet());
        var failures = new ArrayList<String>();
        // Scan BMP + Supplementary Plane 1 (U+0000..U+1FFFF)
        for (int cp = Character.MIN_CODE_POINT; cp <= 0x1FFFF; cp++) {
            if (!Character.isDefined(cp)) {
                continue;     // skip undefined
            }
            if (Character.isSurrogate((char) cp)) {
                continue; // skip surrogate code units
            }
            if (listed.contains(cp)) {
                continue;          // already tested separately
            }
            String s = new String(Character.toChars(cp));
            String folded = foldCase(s);
            if (!s.equals(folded)) {
                failures.add(String.format("Unexpected folding: U+%04X '%s' → '%s'", cp, s, folded));
            }
        }
        assertEquals(0, failures.size(),
                () -> "Some unlisted code points folded unexpectedly:\n"
                        + String.join("\n", failures));
    }
    @ParameterizedTest(name = "CaseFold \"{0}\" → \"{1}\"")
    @MethodSource("caseFoldTestCases")
    void testIndividualCaseFolding(String input, String expected) {
        assertEquals(expected, foldCase(input));
    }
    static Stream<Arguments> caseFoldTestCases() {
        return Stream.of(
                // ASCII simple cases
                Arguments.of("ABC", "abc"),
                Arguments.of("already", "already"),
                Arguments.of("MiXeD123", "mixed123"),
                // --- Latin-1 to non-Latin-1 fold ---
                Arguments.of("aBc\u00B5Efg", "abc\u03BCefg"), // "µ" → "μ"
                Arguments.of("test\u00B5\ud801\udc00X", "test\u03bc\ud801\udc28x"),
                // German Eszett
                Arguments.of("Stra\u00DFe", "strasse"), // "Straße"
                Arguments.of("\u1E9E", "ss"), // "ẞ"  capital sharp S
                // Turkish dotted I / dotless i
                Arguments.of("I", "i"),
                Arguments.of("\u0130", "i\u0307"), // capital dotted I → "i + dot above"
                Arguments.of("\u0069\u0307", "i\u0307"), // small i + dot above remains
                Arguments.of("\u0131", "\u0131"), // "ı" (dotless i stays dotless)
                // Greek special cases ---
                Arguments.of("\u039F\u03A3", "\u03BF\u03C3"), // "ΟΣ" → "οσ"  final sigma always folds to normal sigma
                Arguments.of("\u1F88", "\u1F00\u03B9"), // "ᾈ" → "ἀι"    Alpha with psili + ypogegrammeni
                Arguments.of("\u039C\u03AC\u03CA\u03BF\u03C2", "\u03BC\u03AC\u03CA\u03BF\u03C3"), // "Μάϊος" → "μάϊοσ"
                Arguments.of("\u1F08", "\u1F00"), //  Ἀ (Capital Alpha with psili) → ἀ
                // Supplementary Plane characters
                Arguments.of("\uD801\uDC00", "\uD801\uDC28"), // Deseret Capital Letter Long I → Small
                Arguments.of("\uD801\uDC01", "\uD801\uDC29"), // Deseret Capital Letter Long E → Small
                // Supplementary inside ASCII
                Arguments.of("abc\uD801\uDC00def", "abc\uD801\uDC28def"),
                // Ligatures and compatibility folds
                Arguments.of("\uFB00", "ff"), // ﬀ → ff
                Arguments.of("\uFB03", "ffi"), // ﬃ → ffi
                Arguments.of("\u212A", "k"), // Kelvin sign → k
                Arguments.of("abc\uFB00def", "abcffdef"), // ﬀ → ff
                Arguments.of("abc\uFB03def", "abcffidef"), // ﬃ → ffi
                Arguments.of("abc\u212Adef", "abckdef"), // Kelvin sign → k
                // --- Fullwidth ---
                Arguments.of("\uFF21\uFF22\uFF23", "\uFF41\uFF42\uFF43"), // "ＡＢＣ" → "ａｂｃ"
                // --- Armenian ---
                Arguments.of("\u0531", "\u0561"), // "Ա" → "ա"
                // --- Cherokee ---
                Arguments.of("\u13A0", "\u13A0"), // Capital Cherokee A folds to itself
                Arguments.of("\uAB70", "\u13A0") // Small Cherokee A folds Capital Cherokee A
        );
    }
    static Stream<Arguments> caseFoldEqualProvider() {
        return Stream.of(
                Arguments.of("abc", "ABC"),
                Arguments.of("aBcDe", "AbCdE"),
                Arguments.of("\u00C0\u00E7", "\u00E0\u00C7"), // Àç vs àÇ
                Arguments.of("straße", "STRASSE"), // ß → ss
                Arguments.of("\uD83C\uDDE6", "\uD83C\uDDE6"), // 🇦 vs 🇦
                Arguments.of("\u1E9E", "ss"), // ẞ (capital sharp S)
                Arguments.of("\u03A3", "\u03C3"), // Σ vs σ (Greek Sigma)
                Arguments.of("\u03C3", "\u03C2"), // σ vs ς (Greek sigma/final sigma)
                Arguments.of("\u212B", "\u00E5"), // Å (Angstrom sign) vs å
                Arguments.of("\uFB00", "ff"), // ﬀ (ligature)
                Arguments.of("\u01C5", "\u01C5"), // ǅ (Latin capital D with small z with caron)
                Arguments.of("Caf\u00E9", "CAF\u00C9"), // Café vs CAFÉ
                Arguments.of("\u03BA\u03B1\u03BB\u03B7\u03BC\u03AD\u03C1\u03B1", "\u039A\u0391\u039B\u0397\u039C\u0388\u03A1\u0391"), // καλημέρα vs ΚΑΛΗΜΕΡΑ
                Arguments.of("\u4E2D\u56FD", "\u4E2D\u56FD"), // 中国
                Arguments.of("\u03B1", "\u0391"), // α vs Α (Greek alpha)
                Arguments.of("\u212B", "\u00C5"), // Å vs Å
                // from StringCompareToIgnoreCase
                Arguments.of("\u0100\u0102\u0104\u0106\u0108", "\u0100\u0102\u0104\u0106\u0109"), // ĀĂĄĆĈ vs ĀĂĄĆĉ
                Arguments.of("\u0101\u0103\u0105\u0107\u0109", "\u0100\u0102\u0104\u0106\u0109"), // āăąćĉ vs ĀĂĄĆĉ
                Arguments.of("\ud801\udc00\ud801\udc01\ud801\udc02\ud801\udc03\ud801\udc04",
                        "\ud801\udc00\ud801\udc01\ud801\udc02\ud801\udc03\ud801\udc2c"), // 𐐀𐐁𐐂𐐃𐐄 vs 𐐀𐐁𐐂𐐃𐐬
                Arguments.of("\ud801\udc28\ud801\udc29\ud801\udc2a\ud801\udc2b\ud801\udc2c",
                        "\ud801\udc00\ud801\udc01\ud801\udc02\ud801\udc03\ud801\udc2c") // 𐐨𐐩𐐪𐐫𐐬 vs 𐐀𐐁𐐂𐐃𐐬
        );
    }
    @ParameterizedTest
    @MethodSource("caseFoldEqualProvider")
    void testcompareToFoldCaseEquals(String s1, String s2) {
        assertEquals(0, s1.compareToFoldCase(s2));
        assertEquals(0, s2.compareToFoldCase(s1));
        assertEquals(true, s1.equalsFoldCase(s2));
        assertEquals(true, s2.equalsFoldCase(s1));
        assertEquals(foldCase(s1), foldCase(s2));
    }
    static Stream<Arguments> caseFoldOrderingProvider() {
        return Stream.of(
                Arguments.of("asa", "aß", -1), // ß → ss → "asa" < "ass"
                Arguments.of("aß", "asa", +1),
                Arguments.of("a\u00DF", "ass", 0), // aß vs ass
                Arguments.of("\uFB03", "ffi", 0), // ﬃ (ligature)
                Arguments.of("\u00C5", "Z", 1), // Å vs Z
                Arguments.of("A", "\u00C0", -1), // A vs À
                Arguments.of("\u03A9", "\u03C9", 0), // Ω vs ω
                Arguments.of("\u03C2", "\u03C3", 0), // ς vs σ
                Arguments.of("\uD835\uDD23", "R", 1), // 𝔯 (fraktur r) vs R
                Arguments.of("\uFF26", "E", 1), // Ｆ (full-width F) vs E
                Arguments.of("\u00C9clair", "Eclair", 1), // Éclair vs Eclair
                Arguments.of("\u03bc\u00df", "\u00b5s", 1),
                Arguments.of("\u00b5s", "\u03bc\u00df", -1)
        );
    }
    @ParameterizedTest
    @MethodSource("caseFoldOrderingProvider")
    void testcompareToFoldCaseOrdering(String s1, String s2, int expectedSign) {
        int cmp = s1.compareToFoldCase(s2);
        assertEquals(expectedSign, Integer.signum(cmp));
    }
    static Stream<Arguments> roundTripProvider() {
        return Stream.of(
                Arguments.of("abc"),
                Arguments.of("ABC"),
                Arguments.of("straße"),
                Arguments.of("Àç"),
                Arguments.of("aß"),
                Arguments.of("\uFB02uff"), // ﬂuff (ligature in "fluff")
                Arguments.of("\u00C9COLE") // ÉCOLE
        );
    }
    @ParameterizedTest
    @MethodSource("roundTripProvider")
    void testCaseFoldRoundTrip(String s) {
        String folded = foldCase(s);
        assertEquals(0, s.compareToFoldCase(folded));
        assertEquals(0, folded.compareToFoldCase(s));
        assertEquals(true, s.equalsFoldCase(folded));
        assertEquals(true, folded.equalsFoldCase(s));
    }
    // helper to test the integrity of folding mapping
    private static int[] longToFolding(long value) {
        int len = (int) (value >>> 48);
        if (len == 0) {
            return new int[]{(int) (value & 0xFFFFF)};
        } else {
            var folding = new int[len];
            for (int i = 0; i < len; i++) {
                folding[i] = (int) (value & 0xFFFF);
                value >>= 16;
            }
            return folding;
        }
    }
    private static String foldCase(String s) {
        int first;
        int len = s.length();
        int cpCnt = 1;
        for (first = 0; first < len; first += cpCnt) {
            int cp = s.codePointAt(first);
            if (CaseFolding.isDefined(cp)) {
                break;
            }
            cpCnt = Character.charCount(cp);
        }
        if (first == len) {
            return s;
        }
        StringBuilder sb = new StringBuilder(len);
        sb.append(s, 0, first);
        for (int i = first; i < len; i += cpCnt) {
            int cp = s.codePointAt(i);
            int[] folded = longToFolding(CaseFolding.fold(cp));
            for (int f : folded) {
                sb.appendCodePoint(f);
            }
            cpCnt = Character.charCount(cp);
        }
        return sb.toString();
    }
 }
--- a/test/micro/org/openjdk/bench/java/lang/StringCompareToFoldCase.java
+++ b/test/micro/org/openjdk/bench/java/lang/StringCompareToFoldCase.java
@ -0,0 +1,200 @@
 /*
 * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 */
 package org.openjdk.bench.java.lang;
 import org.openjdk.jmh.annotations.*;
 import java.util.concurrent.TimeUnit;
 /*
 * This benchmark naively explores String::compareToFoldCase performance
 */
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.NANOSECONDS)
@State(Scope.Thread)
@Warmup(iterations = 5, time = 1)
@Measurement(iterations = 5, time = 1)
@Fork(3)
 public class StringCompareToFoldCase {
    private String asciiUpper = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
    private String asciiUpperLower = "ABCDEFGHIJKLMNOpqrstuvwxyz";
    private String asciiLower = "abcdefghijklmnopqrstuvwxyz";
    private String asciiWithDF = "abcdßßßßßßßßßßßßßßßßWXYZ";
    private String asciiWithDFSS = "abcdssssssssssssssssßßßßßßßßWXYZ";
    private String asciiLatine1 = "ABCDEFGHIJKLMNOpqrstuvwxyz0";
    private String asciiLatin1UTF16 = "abcdefghijklmnopqrstuvwxyz\u0391";
    private String greekUpper = "\u0391\u0392\u0393\u0394\u0395\u0391\u0392\u0393\u0394\u0395"; // ΑΒΓΔΕ
    private String greekUpperLower = "\u0391\u0392\u0393\u0394\u0395\u0391\u0392\u0393\u0394\u03B5"; // ΑΒΓΔε
    private String greekLower = "\u03B1\u03B2\u03B3\u03B4\u03B5\u03B1\u03B2\u03B3\u03B4\u03B5"; // αβγδε
    public String supUpper = "\ud801\udc00\ud801\udc01\ud801\udc02\ud801\udc03\ud801\udc04";
    public String supUpperLower = "\ud801\udc00\ud801\udc01\ud801\udc02\ud801\udc03\ud801\udc2c";
    public String supLower = "\ud801\udc28\ud801\udc29\ud801\udc2a\ud801\udc2b\ud801\udc2c";
    @Benchmark
    public int asciiUpperLower() {
        return asciiUpper.compareToIgnoreCase(asciiUpperLower);
    }
    @Benchmark
    public int asciiLower() {
        return asciiUpper.compareToIgnoreCase(asciiLower);
    }
    @Benchmark
    public int greekUpperLower() {
        return greekUpper.compareToIgnoreCase(greekUpperLower);
    }
    @Benchmark
    public int greekLower() {
        return greekUpper.compareToIgnoreCase(greekLower);
    }
    @Benchmark
    public int latin1UTF16() {
        return asciiLatine1.compareToIgnoreCase(asciiLatin1UTF16);
    }
    @Benchmark
    public int supUpperLower() {
        return supUpper.compareToIgnoreCase(supUpperLower);
    }
    @Benchmark
    public int supLower() {
        return supUpper.compareToIgnoreCase(supLower);
    }
    @Benchmark
    public int asciiUpperLowerFC() {
        return asciiUpper.compareToFoldCase(asciiUpperLower);
    }
    @Benchmark
    public int asciiLowerFC() {
        return asciiUpper.compareToFoldCase(asciiLower);
    }
    @Benchmark
    public int asciiWithDFFC() {
        return asciiWithDF.compareToFoldCase(asciiWithDFSS);
    }
    @Benchmark
    public int greekUpperLowerFC() {
        return greekUpper.compareToFoldCase(greekUpperLower);
    }
    @Benchmark
    public int greekLowerFC() {
        return greekUpper.compareToFoldCase(greekLower);
    }
    @Benchmark
    public int latin1UTF16FC() {
        return asciiLatine1.compareToFoldCase(asciiLatin1UTF16); }
    @Benchmark
    public int supUpperLowerFC() {
        return supUpper.compareToFoldCase(supUpperLower);
    }
    @Benchmark
    public int supLowerFC() {
        return supUpper.compareToFoldCase(supLower);
    }
    @Benchmark
    public boolean asciiUpperLowerEQ() {
        return asciiUpper.equalsIgnoreCase(asciiUpperLower);
    }
    @Benchmark
    public boolean asciiLowerEQ() {
        return asciiUpper.equalsIgnoreCase(asciiLower);
    }
    @Benchmark
    public boolean greekUpperLowerEQ() {
        return greekUpper.equalsIgnoreCase(greekUpperLower);
    }
    @Benchmark
    public boolean greekLowerEQ() {
        return greekUpper.equalsIgnoreCase(greekLower);
    }
    @Benchmark
    public boolean latin1UTF16EQ() {
        return asciiLatine1.equalsIgnoreCase(asciiLatin1UTF16);
    }
    @Benchmark
    public boolean supUpperLowerEQ() {
        return supUpper.equalsIgnoreCase(supUpperLower);
    }
    @Benchmark
    public boolean supLowerEQ() {
        return supUpper.equalsIgnoreCase(supLower);
    }
    @Benchmark
    public boolean asciiUpperLowerEQFC() {
        return asciiUpper.equalsFoldCase(asciiUpperLower);
    }
    @Benchmark
    public boolean asciiLowerEQFC() {
        return asciiUpper.equalsFoldCase(asciiLower);
    }
    @Benchmark
    public boolean greekUpperLowerEQFC() {
        return greekUpper.equalsFoldCase(greekUpperLower);
    }
    @Benchmark
    public boolean greekLowerEQFC() {
        return greekUpper.equalsFoldCase(greekLower);
    }
    @Benchmark
    public boolean latin1UTF16EQFC() {
        return asciiLatine1.equalsFoldCase(asciiLatin1UTF16);
    }
    @Benchmark
    public boolean supUpperLowerEQFC() {
        return supUpper.equalsFoldCase(supUpperLower);
    }
    @Benchmark
    public boolean supLowerEQFC() {
        return supUpper.equalsFoldCase(supLower);
    }
 }