Add default implementation on codePointCount in CharSequence

2026-01-28 12:09:14 +00:00 · 2025-07-26 19:05:05 +09:00 · 2025-07-26 19:05:05 +09:00 · 4811c9df90
commit 4811c9df90
parent 1e7da59d46
1 changed files with 25 additions and 0 deletions
--- a/src/java.base/share/classes/java/lang/CharSequence.java
+++ b/src/java.base/share/classes/java/lang/CharSequence.java
@ -253,6 +253,31 @@ public interface CharSequence {
                false);
    }

+    /**
+     * Returns the number of Unicode code points in
+     * this sequence. Unpaired surrogates count
+     * as one code point each.
+     *
+     * @return the number of Unicode code points in this sequence
+     * @since 26
+     */
+    public default int codePointCount() {
+        final int length = length();
+        int n = length;
+        final int lastIndex = length - 1;
+
+        // i < lastIndex works properly even for an empty sequence
+        // thank to the fact that the length/index type in Java is signed
+        for (int i = 0; i < lastIndex;) {
+            if (Character.isHighSurrogate(charAt(i++)) && Character.isLowSurrogate(charAt(i))) {
+                n--;
+                i++;
+            }
+        }
+
+        return n;
+    }
+
    /**
     * Compares two {@code CharSequence} instances lexicographically. Returns a
     * negative value, zero, or a positive value if the first sequence is lexicographically