8367129: Move input validation checks to Java for java.lang.StringLatin1 intrinsics

Reviewed-by: rriggs, rgiulietti
This commit is contained in:
Volkan Yazici 2026-02-20 16:26:18 +00:00
parent e8dadf4baa
commit 72b28672ad
5 changed files with 567 additions and 114 deletions

View File

@ -257,6 +257,7 @@ class methodHandle;
do_intrinsic(_bitCount_i, java_lang_Integer, bitCount_name, int_int_signature, F_S) \
do_intrinsic(_bitCount_l, java_lang_Long, bitCount_name, long_int_signature, F_S) \
do_intrinsic(_compress_i, java_lang_Integer, compress_name, int2_int_signature, F_S) \
do_name( compress_name, "compress") \
do_intrinsic(_compress_l, java_lang_Long, compress_name, long2_long_signature, F_S) \
do_intrinsic(_expand_i, java_lang_Integer, expand_name, int2_int_signature, F_S) \
do_intrinsic(_expand_l, java_lang_Long, expand_name, long2_long_signature, F_S) \
@ -358,11 +359,11 @@ class methodHandle;
do_name( vectorizedHashCode_name, "vectorizedHashCode") \
do_signature(vectorizedHashCode_signature, "(Ljava/lang/Object;IIII)I") \
\
do_intrinsic(_compressStringC, java_lang_StringUTF16, compress_name, encodeISOArray_signature, F_S) \
do_name( compress_name, "compress") \
do_intrinsic(_compressStringB, java_lang_StringUTF16, compress_name, indexOfI_signature, F_S) \
do_intrinsic(_compressStringC, java_lang_StringUTF16, compressString_name, encodeISOArray_signature, F_S) \
do_name( compressString_name, "compress0") \
do_intrinsic(_compressStringB, java_lang_StringUTF16, compressString_name, indexOfI_signature, F_S) \
do_intrinsic(_inflateStringC, java_lang_StringLatin1, inflate_name, inflateC_signature, F_S) \
do_name( inflate_name, "inflate") \
do_name( inflate_name, "inflate0") \
do_signature(inflateC_signature, "([BI[CII)V") \
do_intrinsic(_inflateStringB, java_lang_StringLatin1, inflate_name, inflateB_signature, F_S) \
do_signature(inflateB_signature, "([BI[BII)V") \
@ -381,9 +382,9 @@ class methodHandle;
do_intrinsic(_compareToLU, java_lang_StringLatin1,compareToLU_name, compareTo_indexOf_signature, F_S) \
do_intrinsic(_compareToUL, java_lang_StringUTF16, compareToUL_name, compareTo_indexOf_signature, F_S) \
do_signature(compareTo_indexOf_signature, "([B[B)I") \
do_name( compareTo_name, "compareTo") \
do_name( compareToLU_name, "compareToUTF16") \
do_name( compareToUL_name, "compareToLatin1") \
do_name( compareTo_name, "compareTo0") \
do_name( compareToLU_name, "compareToUTF16_0") \
do_name( compareToUL_name, "compareToLatin1_0") \
do_intrinsic(_indexOfL, java_lang_StringLatin1,indexOf_name, compareTo_indexOf_signature, F_S) \
do_intrinsic(_indexOfU, java_lang_StringUTF16, indexOf_name, compareTo_indexOf_signature, F_S) \
do_intrinsic(_indexOfUL, java_lang_StringUTF16, indexOfUL_name, compareTo_indexOf_signature, F_S) \
@ -392,12 +393,13 @@ class methodHandle;
do_intrinsic(_indexOfIUL, java_lang_StringUTF16, indexOfUL_name, indexOfI_signature, F_S) \
do_intrinsic(_indexOfU_char, java_lang_StringUTF16, indexOfChar_name, indexOfChar_signature, F_S) \
do_intrinsic(_indexOfL_char, java_lang_StringLatin1,indexOfChar_name, indexOfChar_signature, F_S) \
do_name( indexOf_name, "indexOf") \
do_name( indexOfChar_name, "indexOfChar") \
do_name( indexOfUL_name, "indexOfLatin1") \
do_name( indexOf_name, "indexOf0") \
do_name( indexOfChar_name, "indexOfChar0") \
do_name( indexOfUL_name, "indexOfLatin1_0") \
do_signature(indexOfI_signature, "([BI[BII)I") \
do_signature(indexOfChar_signature, "([BIII)I") \
do_intrinsic(_equalsL, java_lang_StringLatin1,equals_name, equalsB_signature, F_S) \
do_intrinsic(_equalsL, java_lang_StringLatin1,equalsString_name, equalsB_signature, F_S) \
do_name( equalsString_name, "equals0") \
\
do_intrinsic(_isDigit, java_lang_CharacterDataLatin1, isDigit_name, int_bool_signature, F_R) \
do_name( isDigit_name, "isDigit") \

View File

@ -1307,8 +1307,8 @@ bool LibraryCallKit::inline_string_indexOfI(StrIntrinsicNode::ArgEnc ae) {
Node* tgt_start = array_element_address(tgt, intcon(0), T_BYTE);
// Range checks
generate_string_range_check(src, src_offset, src_count, ae != StrIntrinsicNode::LL);
generate_string_range_check(tgt, intcon(0), tgt_count, ae == StrIntrinsicNode::UU);
generate_string_range_check(src, src_offset, src_count, ae != StrIntrinsicNode::LL, true);
generate_string_range_check(tgt, intcon(0), tgt_count, ae == StrIntrinsicNode::UU, true);
if (stopped()) {
return true;
}
@ -1404,7 +1404,7 @@ bool LibraryCallKit::inline_string_indexOfChar(StrIntrinsicNode::ArgEnc ae) {
Node* src_count = _gvn.transform(new SubINode(max, from_index));
// Range checks
generate_string_range_check(src, src_offset, src_count, ae == StrIntrinsicNode::U);
generate_string_range_check(src, src_offset, src_count, ae == StrIntrinsicNode::U, true);
// Check for int_ch >= 0
Node* int_ch_cmp = _gvn.transform(new CmpINode(int_ch, intcon(0)));
@ -1448,11 +1448,11 @@ bool LibraryCallKit::inline_string_indexOfChar(StrIntrinsicNode::ArgEnc ae) {
}
//---------------------------inline_string_copy---------------------
// compressIt == true --> generate a compressed copy operation (compress char[]/byte[] to byte[])
// int StringUTF16.compress(char[] src, int srcOff, byte[] dst, int dstOff, int len)
// int StringUTF16.compress(byte[] src, int srcOff, byte[] dst, int dstOff, int len)
// int StringUTF16.compress0(char[] src, int srcOff, byte[] dst, int dstOff, int len)
// int StringUTF16.compress0(byte[] src, int srcOff, byte[] dst, int dstOff, int len)
// compressIt == false --> generate an inflated copy operation (inflate byte[] to char[]/byte[])
// void StringLatin1.inflate(byte[] src, int srcOff, char[] dst, int dstOff, int len)
// void StringLatin1.inflate(byte[] src, int srcOff, byte[] dst, int dstOff, int len)
// void StringLatin1.inflate0(byte[] src, int srcOff, char[] dst, int dstOff, int len)
// void StringLatin1.inflate0(byte[] src, int srcOff, byte[] dst, int dstOff, int len)
bool LibraryCallKit::inline_string_copy(bool compress) {
if (too_many_traps(Deoptimization::Reason_intrinsic)) {
return false;
@ -1495,8 +1495,8 @@ bool LibraryCallKit::inline_string_copy(bool compress) {
}
// Range checks
generate_string_range_check(src, src_offset, length, convert_src);
generate_string_range_check(dst, dst_offset, length, convert_dst);
generate_string_range_check(src, src_offset, length, convert_src, true);
generate_string_range_check(dst, dst_offset, length, convert_dst, true);
if (stopped()) {
return true;
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, 2025, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2015, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -27,6 +27,7 @@ package java.lang;
import java.util.Arrays;
import java.util.Locale;
import java.util.Objects;
import java.util.Spliterator;
import java.util.function.Consumer;
import java.util.function.IntConsumer;
@ -39,12 +40,22 @@ import jdk.internal.vm.annotation.IntrinsicCandidate;
import static java.lang.String.LATIN1;
import static java.lang.String.UTF16;
import static java.lang.String.checkIndex;
import static java.lang.String.checkOffset;
/**
* Latin-1 string operations.
* <p>
* Unless stated otherwise, all methods assume that
* <ul>
* <li>{@code byte[]} arguments denote a Latin-1 string byte array
* <li>indices, offsets, and lengths (typically of type {@code int}) are in
* number of characters, i.e., the number of {@code byte}s/{@code char}s for
* Latin-1 strings, and the number of <a href="Character.html#unicode">Unicode
* code units</a> for UTF-16 strings
* </ul>
*/
final class StringLatin1 {
static char charAt(byte[] value, int index) {
checkIndex(index, value.length);
String.checkIndex(index, value.length);
return (char)(value[index] & 0xff);
}
@ -84,8 +95,23 @@ final class StringLatin1 {
System.arraycopy(value, srcBegin, dst, dstBegin, srcEnd - srcBegin);
}
@IntrinsicCandidate
/**
* {@return {@code true} if provided byte arrays contain identical content; {@code false} otherwise}.
*
* @param value a byte array
* @param other a byte array
*
* @throws NullPointerException if {@code value} or {@code other} is null
*/
static boolean equals(byte[] value, byte[] other) {
Objects.requireNonNull(value);
Objects.requireNonNull(other);
return equals0(value, other);
}
// vmIntrinsics::_equalsL
@IntrinsicCandidate
private static boolean equals0(byte[] value, byte[] other) {
if (value.length == other.length) {
for (int i = 0; i < value.length; i++) {
if (value[i] != other[i]) {
@ -97,33 +123,102 @@ final class StringLatin1 {
return false;
}
@IntrinsicCandidate
/**
* Lexicographically compares two Latin-1 strings as specified in
* {@link String#compareTo(String) String::compareTo}.
*
* @param value a Latin-1 string byte array
* @param other a Latin-1 string byte array
*
* @return {@code 0} if {@code value} is equal to {@code other}, a value
* less than {@code 0} if {@code value} is lexicographically less than
* {@code other}; a value greater than {@code 0} otherwise.
*
* @throws NullPointerException if {@code value} or {@code other} is null
*/
static int compareTo(byte[] value, byte[] other) {
int len1 = value.length;
int len2 = other.length;
return compareTo(value, other, len1, len2);
Objects.requireNonNull(value);
Objects.requireNonNull(other);
return compareTo0(value, other);
}
// vmIntrinsics::_compareToL
@IntrinsicCandidate
private static int compareTo0(byte[] value, byte[] other) {
return compareTo(value, other, value.length, other.length);
}
/**
* Lexicographically compares two Latin-1 string prefixes as specified in
* {@link String#compareTo(String) String::compareTo}.
*
* @param value a Latin-1 string byte array
* @param other a Latin-1 string byte array
* @param len1 the number of characters in {@code value} to compare
* @param len2 the number of characters in {@code other} to compare
*
* @return {@code 0} if the {@code value} prefix is equal to the
* {@code other} prefix, a value less than {@code 0} if the {@code value}
* prefix is lexicographically less than the {@code other} prefix; a
* value greater than {@code 0} otherwise.
*
* @throws NullPointerException if {@code value} or {@code other} is null
* @throws StringIndexOutOfBoundsException if the sub-ranges are out of bounds
*/
static int compareTo(byte[] value, byte[] other, int len1, int len2) {
Objects.requireNonNull(value);
Objects.requireNonNull(other);
String.checkOffset(len1, length(value));
String.checkOffset(len2, length(other));
int lim = Math.min(len1, len2);
int k = ArraysSupport.mismatch(value, other, lim);
return (k < 0) ? len1 - len2 : getChar(value, k) - getChar(other, k);
}
@IntrinsicCandidate
/**
* Lexicographically compares a Latin-1 string to a UTF-16 string as
* specified in {@link String#compareTo(String) String::compareTo}.
*
* @param value a Latin-1 string byte array
* @param other a UTF-16 string byte array
*
* @return {@code 0} if the {@code value} is equal to the {@code other}, a
* value less than {@code 0} if the {@code value} is lexicographically less
* than the {@code other}; a value greater than {@code 0} otherwise.
*
* @throws NullPointerException if {@code value} or {@code other} is null
*/
static int compareToUTF16(byte[] value, byte[] other) {
Objects.requireNonNull(value);
Objects.requireNonNull(other);
return compareToUTF16_0(value, other);
}
// vmIntrinsics::_compareToLU
@IntrinsicCandidate
private static int compareToUTF16_0(byte[] value, byte[] other) {
int len1 = length(value);
int len2 = StringUTF16.length(other);
return compareToUTF16Values(value, other, len1, len2);
}
/*
* Checks the boundary and then compares the byte arrays.
/**
* Lexicographically compares a Latin-1 string prefix to a UTF-16 one as
* specified in {@link String#compareTo(String) String::compareTo}.
*
* @param value a Latin-1 string byte array
* @param other a UTF-16 string byte array
* @param len1 the number of characters in {@code value} to compare
* @param len2 the number of characters in {@code other} to compare
*
* @throws NullPointerException if {@code value} or {@code other} is null
* @throws StringIndexOutOfBoundsException if the sub-ranges are out of bounds
*/
static int compareToUTF16(byte[] value, byte[] other, int len1, int len2) {
checkOffset(len1, length(value));
checkOffset(len2, StringUTF16.length(other));
Objects.requireNonNull(value);
Objects.requireNonNull(other);
String.checkOffset(len1, length(value));
String.checkOffset(len2, StringUTF16.length(other));
return compareToUTF16Values(value, other, len1, len2);
}
@ -139,9 +234,12 @@ final class StringLatin1 {
return len1 - len2;
}
/**
* Case-insensitive {@link #compareTo(byte[], byte[]) compareTo}.
*/
static int compareToCI(byte[] value, byte[] other) {
int len1 = value.length;
int len2 = other.length;
int len1 = value.length; // Implicit null check on `value`
int len2 = other.length; // Implicit null check on `other`
int lim = Math.min(len1, len2);
for (int k = 0; k < lim; k++) {
if (value[k] != other[k]) {
@ -159,7 +257,12 @@ final class StringLatin1 {
return len1 - len2;
}
/**
* Case-insensitive {@link #compareToUTF16(byte[], byte[]) compareToUTF16}.
*/
static int compareToCI_UTF16(byte[] value, byte[] other) {
Objects.requireNonNull(value);
Objects.requireNonNull(other);
int len1 = length(value);
int len2 = StringUTF16.length(other);
int lim = Math.min(len1, len2);
@ -307,16 +410,34 @@ final class StringLatin1 {
return ArraysSupport.hashCodeOfUnsigned(value, 0, value.length, 0);
}
// Caller must ensure that from- and toIndex are within bounds
/**
* Finds the index of the first character matching the provided one in the
* given Latin-1 string byte array sub-range. {@code -1} is returned if the
* provided character cannot be encoded in Latin-1, or cannot be found in
* the target string sub-range.
*
* @param value a Latin-1 string byte array to search in
* @param ch a character to search for
* @param fromIndex the index (inclusive) of the first character in the sub-range
* @param toIndex the index (exclusive) of the last character in the sub-range
*
* @return the index of the first character matching the provided one in the
* given target string sub-range; {@code -1} otherwise
*
* @throws NullPointerException if {@code value} is null
* @throws StringIndexOutOfBoundsException if the sub-range is out of bounds
*/
static int indexOf(byte[] value, int ch, int fromIndex, int toIndex) {
String.checkBoundsBeginEnd(fromIndex, toIndex, value.length); // Implicit null check on `value`
if (!canEncode(ch)) {
return -1;
}
return indexOfChar(value, ch, fromIndex, toIndex);
return indexOfChar0(value, ch, fromIndex, toIndex);
}
// vmIntrinsics::_indexOfL_char
@IntrinsicCandidate
private static int indexOfChar(byte[] value, int ch, int fromIndex, int max) {
private static int indexOfChar0(byte[] value, int ch, int fromIndex, int max) {
byte c = (byte)ch;
for (int i = fromIndex; i < max; i++) {
if (value[i] == c) {
@ -326,22 +447,67 @@ final class StringLatin1 {
return -1;
}
@IntrinsicCandidate
/**
* Searches for the first occurrence of {@code str} in {@code value}, and,
* if found, returns the index of the first character of the matching
* {@code value} sub-range; {@code -1} otherwise.
*
* @param value a Latin-1 string byte array to search in
* @param str a Latin-1 string byte array to search for
*
* @return the index of the first character of the matching {@code value}
* sub-range if a match is found; {@code -1} otherwise
*
* @throws NullPointerException if {@code value} or {@code str} is null
*/
static int indexOf(byte[] value, byte[] str) {
if (str.length == 0) {
return 0;
}
if (value.length == 0) {
return -1;
}
return indexOf(value, value.length, str, str.length, 0);
Objects.requireNonNull(value);
Objects.requireNonNull(str);
return indexOf0(value, str);
}
// vmIntrinsics::_indexOfL
@IntrinsicCandidate
static int indexOf(byte[] value, int valueCount, byte[] str, int strCount, int fromIndex) {
private static int indexOf0(byte[] value, byte[] str) {
return indexOf0(value, value.length, str, str.length, 0);
}
/**
* Searches for the first occurrence of the given {@code str} sub-range in
* the given {@code value} sub-range, and, if found, returns the index of
* the first character of the matching {@code value} sub-range; {@code -1}
* otherwise.
*
* @param value a Latin-1 string byte array to search in
* @param valueToIndex the index (exclusive) of the last character in {@code value}
* @param str a Latin-1 string byte array to search for
* @param strToIndex the index (exclusive) of the last character in {@code str}
* @param valueFromIndex the index (inclusive) of the first character in {@code value}
*
* @return the index of the first character of the matching {@code value}
* sub-range if a match is found; {@code -1} otherwise
*
* @throws NullPointerException if {@code value} or {@code str} is null
* @throws StringIndexOutOfBoundsException if the sub-ranges are out of bounds
*/
static int indexOf(byte[] value, int valueToIndex, byte[] str, int strToIndex, int valueFromIndex) {
String.checkBoundsBeginEnd(valueFromIndex, valueToIndex, value.length); // Implicit null check on `value`
String.checkBoundsBeginEnd(0, strToIndex, str.length); // Implicit null check on `str`
return indexOf0(value, valueToIndex, str, strToIndex, valueFromIndex);
}
// vmIntrinsics::_indexOfIL
@IntrinsicCandidate
private static int indexOf0(byte[] value, int valueToIndex, byte[] str, int strToIndex, int valueFromIndex) {
if (strToIndex == 0) {
return 0;
}
if ((valueToIndex - valueFromIndex) < strToIndex) {
return -1;
}
byte first = str[0];
int max = (valueCount - strCount);
for (int i = fromIndex; i <= max; i++) {
int max = (valueToIndex - strToIndex);
for (int i = valueFromIndex; i <= max; i++) {
// Look for first character.
if (value[i] != first) {
while (++i <= max && value[i] != first);
@ -349,7 +515,7 @@ final class StringLatin1 {
// Found first character, now look at the rest of value
if (i <= max) {
int j = i + 1;
int end = j + strCount - 1;
int end = j + strToIndex - 1;
for (int k = 1; j < end && value[j] == str[k]; j++, k++);
if (j == end) {
// Found whole string.
@ -855,18 +1021,65 @@ final class StringLatin1 {
LATIN1);
}
// inflatedCopy byte[] -> char[]
@IntrinsicCandidate
/**
* Copies characters from a Latin-1 string byte array sub-range to the
* given {@code char} array sub-range.
* <p>
* This effectively <em>inflates</em> the content from a 1 byte per
* character representation to a 2 byte one.
*
* @param src the source Latin-1 string byte array
* @param srcOff the index (inclusive) of the first character in {@code src}
* @param dst the target {@code char} array
* @param dstOff the index (inclusive) of the first character in {@code dst}
* @param len the maximum number of characters to copy
*
* @throws NullPointerException if {@code src} or {@code dst} is null
* @throws StringIndexOutOfBoundsException if the sub-ranges are out of bounds
*/
static void inflate(byte[] src, int srcOff, char[] dst, int dstOff, int len) {
String.checkBoundsOffCount(srcOff, len, src.length); // Implicit null check on `src`
String.checkBoundsOffCount(dstOff, len, dst.length); // Implicit null check on `dst`
inflate0(src, srcOff, dst, dstOff, len);
}
// vmIntrinsics::_inflateStringC
@IntrinsicCandidate
private static void inflate0(byte[] src, int srcOff, char[] dst, int dstOff, int len) {
for (int i = 0; i < len; i++) {
dst[dstOff++] = (char)(src[srcOff++] & 0xff);
}
}
// inflatedCopy byte[] -> byte[]
@IntrinsicCandidate
/**
* Copies characters from a Latin-1 string byte array sub-range to a UTF-16
* one.
* <p>
* This effectively <em>inflates</em> the content from a 1 byte per
* character representation to a 2 byte one.
*
* @param src the source Latin-1 string byte array
* @param srcOff the index (inclusive) of the first character in {@code src}
* @param dst the target UTF-16 string byte array
* @param dstOff the index (inclusive) of the first character in {@code dst}
* @param len the maximum number of characters to copy
*
* @throws NullPointerException if {@code src} or {@code dst} is null
* @throws StringIndexOutOfBoundsException if the sub-ranges are out of bounds
*/
static void inflate(byte[] src, int srcOff, byte[] dst, int dstOff, int len) {
StringUTF16.inflate(src, srcOff, dst, dstOff, len);
String.checkBoundsOffCount(srcOff, len, src.length); // Implicit null check on `src`
Objects.requireNonNull(dst);
String.checkBoundsOffCount(dstOff, len, StringUTF16.length(dst));
inflate0(src, srcOff, dst, dstOff, len);
}
// vmIntrinsics::_inflateStringB
@IntrinsicCandidate
private static void inflate0(byte[] src, int srcOff, byte[] dst, int dstOff, int len) {
for (int i = 0; i < len; i++) {
StringUTF16.putChar(dst, dstOff++, src[srcOff++] & 0xff);
}
}
static class CharsSpliterator implements Spliterator.OfInt {

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, 2025, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2015, 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2025, Alibaba Group Holding Limited. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@ -28,6 +28,7 @@ package java.lang;
import java.util.Arrays;
import java.util.Locale;
import java.util.Objects;
import java.util.Spliterator;
import java.util.function.Consumer;
import java.util.function.IntConsumer;
@ -49,8 +50,13 @@ import static java.lang.String.UTF16;
/// it has the same endianness as a char, which is the platform endianness.
/// This is ensured in the static initializer of StringUTF16.
///
/// All indices and sizes for byte arrays carrying UTF16 data are in number of
/// chars instead of number of bytes.
/// Unless stated otherwise, all methods assume that
///
/// - `byte[]` arguments denote a UTF-16 string byte array
/// - indices, offsets, and lengths (typically of type `int`) are in number of
/// characters, i.e., the number of
/// [Unicode code units](Character.html#unicode) for UTF-16 strings, and the
/// number of `byte`s/`char`s for Latin-1 strings
final class StringUTF16 {
// Return a new byte array for a UTF16-coded string for len chars
@ -388,9 +394,33 @@ final class StringUTF16 {
return n;
}
// compressedCopy char[] -> byte[]
@IntrinsicCandidate
/**
* Copies the prefix of Latin-1 characters from a {@code char} array
* sub-range to a Latin-1 string byte array sub-range.
* <p>
* This effectively <em>compresses</em> the content from a 2 byte per
* character representation to a 1 byte one.
*
* @param src the source {@code char} array
* @param srcOff the index (inclusive) of the first character in {@code src}
* @param dst the target Latin-1 string byte array
* @param dstOff the index (inclusive) of the first character in {@code dst}
* @param len the maximum number of characters to copy
*
* @return the number of characters copied
*
* @throws NullPointerException if {@code src} or {@code dst} is null
* @throws StringIndexOutOfBoundsException if the sub-ranges are out of bounds
*/
static int compress(char[] src, int srcOff, byte[] dst, int dstOff, int len) {
String.checkBoundsOffCount(srcOff, len, src.length); // Implicit null check on `src`
String.checkBoundsOffCount(dstOff, len, dst.length); // Implicit null check on `dst`
return compress0(src, srcOff, dst, dstOff, len);
}
// vmIntrinsics::_compressStringC
@IntrinsicCandidate
private static int compress0(char[] src, int srcOff, byte[] dst, int dstOff, int len) {
for (int i = 0; i < len; i++) {
char c = src[srcOff];
if (c > 0xff) {
@ -403,11 +433,34 @@ final class StringUTF16 {
return len;
}
// compressedCopy byte[] -> byte[]
@IntrinsicCandidate
/**
* Copies the prefix of Latin-1 characters from a UTF-16 string byte array
* sub-range to a Latin-1 one.
* <p>
* This effectively <em>compresses</em> the content from a 2 byte per
* character representation to a 1 byte one.
*
* @param src the source UTF-16 string byte array
* @param srcOff the index (inclusive) of the first character in {@code src}
* @param dst the target Latin-1 string byte array
* @param dstOff the index (inclusive) of the first character in {@code dst}
* @param len the maximum number of characters to copy
*
* @return the number of characters copied
*
* @throws NullPointerException if {@code src} or {@code dst} is null
* @throws StringIndexOutOfBoundsException if the sub-ranges are out of bounds
*/
static int compress(byte[] src, int srcOff, byte[] dst, int dstOff, int len) {
// We need a range check here because 'getChar' has no checks
checkBoundsOffCount(srcOff, len, src);
Objects.requireNonNull(src);
String.checkBoundsOffCount(srcOff, len, length(src));
String.checkBoundsOffCount(dstOff, len, dst.length); // Implicit null check on `dst`
return compress0(src, srcOff, dst, dstOff, len);
}
// vmIntrinsics::_compressStringB
@IntrinsicCandidate
private static int compress0(byte[] src, int srcOff, byte[] dst, int dstOff, int len) {
for (int i = 0; i < len; i++) {
char c = getChar(src, srcOff);
if (c > 0xff) {
@ -446,7 +499,7 @@ final class StringUTF16 {
static void getChars(byte[] value, int srcBegin, int srcEnd, char[] dst, int dstBegin) {
// We need a range check here because 'getChar' has no checks
if (srcBegin < srcEnd) {
checkBoundsOffCount(srcBegin, srcEnd - srcBegin, value);
String.checkBoundsOffCount(srcBegin, srcEnd - srcBegin, length(value));
}
for (int i = srcBegin; i < srcEnd; i++) {
dst[dstBegin++] = getChar(value, i);
@ -462,20 +515,50 @@ final class StringUTF16 {
}
}
@IntrinsicCandidate
/**
* Lexicographically compares two UTF-16 strings as specified in
* {@link String#compareTo(String) String::compareTo}.
*
* @param value a UTF-16 string byte array
* @param other a UTF-16 string byte array
*
* @return {@code 0} if {@code value} is equal to {@code other}, a value
* less than {@code 0} if {@code value} is lexicographically less than
* {@code other}; a value greater than {@code 0} otherwise.
*
* @throws NullPointerException if {@code value} or {@code other} is null
*/
static int compareTo(byte[] value, byte[] other) {
Objects.requireNonNull(value);
Objects.requireNonNull(other);
return compareTo0(value, other);
}
// vmIntrinsics::_compareToU
@IntrinsicCandidate
private static int compareTo0(byte[] value, byte[] other) {
int len1 = length(value);
int len2 = length(other);
return compareValues(value, other, len1, len2);
}
/*
* Checks the boundary and then compares the byte arrays.
/**
* Lexicographically compares two UTF-16 string prefixes as specified in
* {@link String#compareTo(String) String::compareTo}.
*
* @param value a UTF-16 string byte array
* @param other a UTF-16 string byte array
* @param len1 the number of characters in {@code value} to compare
* @param len2 the number of characters in {@code other} to compare
*
* @throws NullPointerException if {@code value} or {@code other} is null
* @throws StringIndexOutOfBoundsException if the sub-ranges are out of bounds
*/
static int compareTo(byte[] value, byte[] other, int len1, int len2) {
Objects.requireNonNull(value);
Objects.requireNonNull(other);
checkOffset(len1, value);
checkOffset(len2, other);
return compareValues(value, other, len1, len2);
}
@ -491,16 +574,58 @@ final class StringUTF16 {
return len1 - len2;
}
@IntrinsicCandidate
/**
* Lexicographically compares a UTF-16 string to a Latin-1 one as specified
* in {@link String#compareTo(String) String::compareTo}.
*
* @param value a UTF-16 string byte array
* @param other a Latin-1 string byte array
*
* @return {@code 0} if {@code value} is equal to {@code other}, a value
* less than {@code 0} if {@code value} is lexicographically less than
* {@code other}; a value greater than {@code 0} otherwise.
*
* @throws NullPointerException if {@code value} or {@code other} is null
*/
static int compareToLatin1(byte[] value, byte[] other) {
Objects.requireNonNull(value);
Objects.requireNonNull(other);
return compareToLatin1_0(value, other);
}
// vmIntrinsics::_compareToUL
@IntrinsicCandidate
private static int compareToLatin1_0(byte[] value, byte[] other) {
return -StringLatin1.compareToUTF16(other, value);
}
/**
* Lexicographically compares a UTF-16 string prefix to a Latin-1 one as
* specified in {@link String#compareTo(String) String::compareTo}.
*
* @param value a UTF-16 string byte array
* @param other a Latin-1 string byte array
* @param len1 the number of characters from {@code value} to compare
* @param len2 the number of characters from {@code other} to compare
*
* @return {@code 0} if the {@code value} prefix is equal to the
* {@code other} prefix, a value less than {@code 0} if the {@code value}
* prefix is lexicographically less than the {@code other} prefix; a
* value greater than {@code 0} otherwise.
*
* @throws NullPointerException if {@code value} or {@code other} is null
* @throws StringIndexOutOfBoundsException if the sub-ranges are out of bounds
*/
static int compareToLatin1(byte[] value, byte[] other, int len1, int len2) {
return -StringLatin1.compareToUTF16(other, value, len2, len1);
}
/**
* Case-insensitive {@link #compareTo(byte[], byte[]) compareTo}.
*/
static int compareToCI(byte[] value, byte[] other) {
Objects.requireNonNull(value);
Objects.requireNonNull(other);
return compareToCIImpl(value, 0, length(value), other, 0, length(other));
}
@ -589,6 +714,9 @@ final class StringUTF16 {
return cp;
}
/**
* Case-insensitive {@link #compareToLatin1(byte[], byte[]) compareToLatin1}.
*/
static int compareToCI_Latin1(byte[] value, byte[] other) {
return -StringLatin1.compareToCI_UTF16(other, value);
}
@ -668,19 +796,52 @@ final class StringUTF16 {
return ArraysSupport.hashCodeOfUTF16(value, 0, value.length >> 1, 0);
}
// Caller must ensure that from- and toIndex are within bounds
/**
* {@return the index of the first character matching the provided one in
* the given UTF-16 string byte array sub-range; {@code -1} otherwise}
*
* @param value a UTF-16 string byte array to search in
* @param ch a character to search for
* @param fromIndex the index (inclusive) of the first character in the sub-range
* @param toIndex the index (exclusive) of the last character in the sub-range
*
* @throws NullPointerException if {@code value} is null
* @throws StringIndexOutOfBoundsException if the sub-range is out of bounds
*/
static int indexOf(byte[] value, int ch, int fromIndex, int toIndex) {
Objects.requireNonNull(value);
checkBoundsBeginEnd(fromIndex, toIndex, value);
if (ch < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
// handle most cases here (ch is a BMP code point or a
// negative value (invalid code point))
return indexOfChar(value, ch, fromIndex, toIndex);
return indexOfChar0(value, ch, fromIndex, toIndex);
} else {
return indexOfSupplementary(value, ch, fromIndex, toIndex);
}
}
@IntrinsicCandidate
/**
* Searches for the first occurrence of {@code str} in {@code value}, and,
* if found, returns the index of the first character of the matching
* {@code value} sub-range; {@code -1} otherwise.
*
* @param value a UTF-16 string byte array to search in
* @param str a UTF-16 string byte array to search for
*
* @return the index of the first character of the matching {@code value}
* sub-range if a match is found; {@code -1} otherwise
*
* @throws NullPointerException if {@code value} or {@code str} is null
*/
static int indexOf(byte[] value, byte[] str) {
Objects.requireNonNull(value);
Objects.requireNonNull(str);
return indexOf0(value, str);
}
// vmIntrinsics::_indexOfU
@IntrinsicCandidate
private static int indexOf0(byte[] value, byte[] str) {
if (str.length == 0) {
return 0;
}
@ -690,22 +851,58 @@ final class StringUTF16 {
return indexOfUnsafe(value, length(value), str, length(str), 0);
}
@IntrinsicCandidate
static int indexOf(byte[] value, int valueCount, byte[] str, int strCount, int fromIndex) {
checkBoundsBeginEnd(fromIndex, valueCount, value);
checkBoundsBeginEnd(0, strCount, str);
return indexOfUnsafe(value, valueCount, str, strCount, fromIndex);
/**
* Searches for the first occurrence of the given {@code str} sub-range in
* the given {@code value} sub-range, and, if found, returns the index of
* the first character of the matching {@code value} sub-range; {@code -1}
* otherwise.
*
* @param value a UTF-16 string byte array to search in
* @param valueToIndex the index (exclusive) of the last character in {@code value}
* @param str a UTF-16 string byte array to search for
* @param strToIndex the index (exclusive) of the last character in {@code str}
* @param valueFromIndex the index (inclusive) of the first character in {@code value}
*
* @return the index of the first character of the matching {@code value}
* sub-range if a match is found; {@code -1} otherwise
*
* @throws NullPointerException if {@code value} or {@code str} is null
* @throws StringIndexOutOfBoundsException if the sub-ranges are out of bounds
*/
static int indexOf(byte[] value, int valueToIndex, byte[] str, int strToIndex, int valueFromIndex) {
Objects.requireNonNull(value);
checkBoundsBeginEnd(valueFromIndex, valueToIndex, value);
Objects.requireNonNull(str);
checkBoundsBeginEnd(0, strToIndex, str);
return indexOf0(value, valueToIndex, str, strToIndex, valueFromIndex);
}
// vmIntrinsics::_indexOfIU
@IntrinsicCandidate
private static int indexOf0(byte[] value, int valueToIndex, byte[] str, int strToIndex, int valueFromIndex) {
if (strToIndex == 0) {
return 0;
}
if ((valueToIndex - valueFromIndex) < strToIndex) {
return -1;
}
return indexOfUnsafe(value, valueToIndex, str, strToIndex, valueFromIndex);
}
private static int indexOfUnsafe(byte[] value, int valueCount, byte[] str, int strCount, int fromIndex) {
assert fromIndex >= 0;
assert strCount > 0;
assert strCount <= length(str);
assert valueCount >= strCount;
// This method has the following assumptions on its inputs:
//
// - Arrays are not null
// - Sub-ranges are valid
// - The `str` sub-range is not empty
// - The `value` sub-range length is greater than or equal to the `str` sub-range length
private static int indexOfUnsafe(byte[] value, int valueToIndex, byte[] str, int strToIndex, int valueFromIndex) {
assert valueFromIndex >= 0;
assert strToIndex > 0;
assert strToIndex <= length(str);
assert (valueToIndex - valueFromIndex) >= strToIndex;
char first = getChar(str, 0);
int max = (valueCount - strCount);
for (int i = fromIndex; i <= max; i++) {
int max = (valueToIndex - strToIndex);
for (int i = valueFromIndex; i <= max; i++) {
// Look for first character.
if (getChar(value, i) != first) {
while (++i <= max && getChar(value, i) != first);
@ -713,7 +910,7 @@ final class StringUTF16 {
// Found first character, now look at the rest of value
if (i <= max) {
int j = i + 1;
int end = j + strCount - 1;
int end = j + strToIndex - 1;
for (int k = 1; j < end && getChar(value, j) == getChar(str, k); j++, k++);
if (j == end) {
// Found whole string.
@ -724,12 +921,29 @@ final class StringUTF16 {
return -1;
}
/**
* Handles indexOf Latin1 substring in UTF16 string.
* Searches for the first occurrence of the given Latin-1 string byte array
* {@code str} in the given UTF-16 string byte array {@code value}, and, if
* found, returns the index of the first character of the matching
* {@code value} sub-range; {@code -1} otherwise.
*
* @param value a UTF-16 string byte array to search in
* @param str a Latin-1 string byte array to search for
*
* @return the index of the first character of the matching {@code value}
* sub-range if a match is found; {@code -1} otherwise
*
* @throws NullPointerException if {@code value} or {@code str} is null
*/
@IntrinsicCandidate
static int indexOfLatin1(byte[] value, byte[] str) {
Objects.requireNonNull(value);
Objects.requireNonNull(str);
return indexOfLatin1_0(value, str);
}
// vmIntrinsics::_indexOfUL
@IntrinsicCandidate
private static int indexOfLatin1_0(byte[] value, byte[] str) {
if (str.length == 0) {
return 0;
}
@ -739,18 +953,54 @@ final class StringUTF16 {
return indexOfLatin1Unsafe(value, length(value), str, str.length, 0);
}
@IntrinsicCandidate
static int indexOfLatin1(byte[] src, int srcCount, byte[] tgt, int tgtCount, int fromIndex) {
checkBoundsBeginEnd(fromIndex, srcCount, src);
String.checkBoundsBeginEnd(0, tgtCount, tgt.length);
return indexOfLatin1Unsafe(src, srcCount, tgt, tgtCount, fromIndex);
/**
* Searches for the first occurrence of the given Latin-1 string byte array
* {@code tgt} sub-range in the given UTF-16 string byte array {@code src}
* sub-range, and, if found, returns the index of the first character in
* {@code src}; {@code -1} otherwise.
*
* @param src a UTF-16 string byte array to search in
* @param srcToIndex the index (exclusive) of the last character in {@code src}
* @param tgt a Latin-1 string byte array to search for
* @param tgtToIndex the index (exclusive) of the last character in {@code tgt}
* @param tgtFromIndex the index (inclusive) of the first character in {@code src}
*
* @return the index of the first character of the matching {@code src}
* sub-range if a match is found; {@code -1} otherwise
*
* @throws NullPointerException if {@code src} or {@code tgt} is null
* @throws StringIndexOutOfBoundsException if the sub-ranges are out of bounds
*/
static int indexOfLatin1(byte[] src, int srcToIndex, byte[] tgt, int tgtToIndex, int tgtFromIndex) {
Objects.requireNonNull(src);
checkBoundsBeginEnd(tgtFromIndex, srcToIndex, src);
String.checkBoundsBeginEnd(0, tgtToIndex, tgt.length);
return indexOfLatin1_0(src, srcToIndex, tgt, tgtToIndex, tgtFromIndex);
}
static int indexOfLatin1Unsafe(byte[] src, int srcCount, byte[] tgt, int tgtCount, int fromIndex) {
// vmIntrinsics::_indexOfIUL
@IntrinsicCandidate
private static int indexOfLatin1_0(byte[] src, int srcToIndex, byte[] tgt, int tgtToIndex, int srcFromIndex) {
if (tgtToIndex == 0) {
return 0;
}
if ((srcToIndex - srcFromIndex) < tgtToIndex) {
return -1;
}
return indexOfLatin1Unsafe(src, srcToIndex, tgt, tgtToIndex, srcFromIndex);
}
// This method has the following assumptions on its inputs:
//
// - Arrays are not null
// - Sub-ranges are valid
// - The `tgt` sub-range is not empty
// - The `src` sub-range length is greater than or equal to the `tgt` sub-range length
private static int indexOfLatin1Unsafe(byte[] src, int srcCount, byte[] tgt, int tgtCount, int fromIndex) {
assert fromIndex >= 0;
assert tgtCount > 0;
assert tgtCount <= tgt.length;
assert srcCount >= tgtCount;
assert (srcCount - fromIndex) >= tgtCount;
char first = (char)(tgt[0] & 0xff);
int max = (srcCount - tgtCount);
for (int i = fromIndex; i <= max; i++) {
@ -774,8 +1024,9 @@ final class StringUTF16 {
return -1;
}
// vmIntrinsics::_indexOfU_char
@IntrinsicCandidate
private static int indexOfChar(byte[] value, int ch, int fromIndex, int max) {
private static int indexOfChar0(byte[] value, int ch, int fromIndex, int max) {
for (int i = fromIndex; i < max; i++) {
if (getChar(value, i) == ch) {
return i;
@ -1590,7 +1841,7 @@ final class StringUTF16 {
}
static boolean contentEquals(byte[] v1, byte[] v2, int len) {
checkBoundsOffCount(0, len, v2);
String.checkBoundsOffCount(0, len, length(v2));
for (int i = 0; i < len; i++) {
if ((char)(v1[i] & 0xff) != getChar(v2, i)) {
return false;
@ -1667,15 +1918,6 @@ final class StringUTF16 {
}
}
// inflatedCopy byte[] -> byte[]
static void inflate(byte[] src, int srcOff, byte[] dst, int dstOff, int len) {
// We need a range check here because 'putChar' has no checks
checkBoundsOffCount(dstOff, len, dst);
for (int i = 0; i < len; i++) {
putChar(dst, dstOff++, src[srcOff++] & 0xff);
}
}
// srcCoder == UTF16 && tgtCoder == LATIN1
static int lastIndexOfLatin1(byte[] src, int srcCount,
byte[] tgt, int tgtCount, int fromIndex) {
@ -1742,8 +1984,4 @@ final class StringUTF16 {
String.checkBoundsBeginEnd(begin, end, length(val));
}
static void checkBoundsOffCount(int offset, int count, byte[] val) {
String.checkBoundsOffCount(offset, count, length(val));
}
}

View File

@ -157,7 +157,7 @@ public class Helper {
}
public static void inflate(byte[] src, int srcOff, byte[] dst, int dstOff, int len) {
StringUTF16.inflate(src, srcOff, dst, dstOff, len);
StringLatin1.inflate(src, srcOff, dst, dstOff, len);
}
public static int indexOf(byte[] src, int srcCount,