diff --git a/src/java.base/share/classes/java/lang/CharSequence.java b/src/java.base/share/classes/java/lang/CharSequence.java index 2732ee35933..e66d50b4d6d 100644 --- a/src/java.base/share/classes/java/lang/CharSequence.java +++ b/src/java.base/share/classes/java/lang/CharSequence.java @@ -253,6 +253,31 @@ public interface CharSequence { false); } + /** + * Returns the number of Unicode code points in + * this sequence. Unpaired surrogates count + * as one code point each. + * + * @return the number of Unicode code points in this sequence + * @since 26 + */ + public default int codePointCount() { + final int length = length(); + int n = length; + final int lastIndex = length - 1; + + // i < lastIndex works properly even for an empty sequence + // thank to the fact that the length/index type in Java is signed + for (int i = 0; i < lastIndex;) { + if (Character.isHighSurrogate(charAt(i++)) && Character.isLowSurrogate(charAt(i))) { + n--; + i++; + } + } + + return n; + } + /** * Compares two {@code CharSequence} instances lexicographically. Returns a * negative value, zero, or a positive value if the first sequence is lexicographically