/*
* Copyright (c) 2000, 2025, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2024, Alibaba Group Holding Limited. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package java.lang;
import jdk.internal.util.Preconditions;
import jdk.internal.vm.annotation.IntrinsicCandidate;
import java.util.function.BiFunction;
/**
* Utility class for string encoding and decoding.
*/
class StringCoding {
private StringCoding() { }
/**
* Count the number of leading non-zero ascii chars in the range.
*/
static int countNonZeroAscii(String s) {
byte[] value = s.value();
if (s.isLatin1()) {
return countNonZeroAsciiLatin1(value, 0, value.length);
} else {
return countNonZeroAsciiUTF16(value, 0, s.length());
}
}
/**
* Count the number of non-zero ascii chars in the range.
*/
private static int countNonZeroAsciiLatin1(byte[] ba, int off, int len) {
int limit = off + len;
for (int i = off; i < limit; i++) {
if (ba[i] <= 0) {
return i - off;
}
}
return len;
}
/**
* Count the number of leading non-zero ascii chars in the range.
*/
private static int countNonZeroAsciiUTF16(byte[] ba, int off, int strlen) {
int limit = off + strlen;
for (int i = off; i < limit; i++) {
char c = StringUTF16.charAt(ba, i);
if (c == 0 || c > 0x7F) {
return i - off;
}
}
return strlen;
}
static boolean hasNegatives(byte[] ba, int off, int len) {
return countPositives(ba, off, len) != len;
}
/**
* Count the number of leading positive bytes in the range.
*
* @implSpec the implementation must return len if there are no negative
* bytes in the range. If there are negative bytes, the implementation must return
* a value that is less than or equal to the index of the first negative byte
* in the range.
*
* @param ba a byte array
* @param off the index of the first byte to start reading from
* @param len the total number of bytes to read
* @throws NullPointerException if {@code ba} is null
* @throws ArrayIndexOutOfBoundsException if the provided sub-range is
* {@linkplain Preconditions#checkFromIndexSize(int, int, int, BiFunction) out of bounds}
*/
static int countPositives(byte[] ba, int off, int len) {
Preconditions.checkFromIndexSize(
off, len,
ba.length, // Implicit null check on `ba`
Preconditions.AIOOBE_FORMATTER);
return countPositives0(ba, off, len);
}
@IntrinsicCandidate
private static int countPositives0(byte[] ba, int off, int len) {
int limit = off + len;
for (int i = off; i < limit; i++) {
if (ba[i] < 0) {
return i - off;
}
}
return len;
}
/**
* Encodes as many ISO-8859-1 codepoints as possible from the source byte
* array containing characters encoded in UTF-16, into the destination byte
* array, assuming that the encoding is ISO-8859-1 compatible.
*
* @param sa the source byte array containing characters encoded in UTF-16
* @param sp the index of the character (not byte!) from the source array to start reading from
* @param da the target byte array
* @param dp the index of the target array to start writing to
* @param len the maximum number of characters (not bytes!) to be encoded
* @return the total number of characters (not bytes!) successfully encoded
* @throws NullPointerException if any of the provided arrays is null
*/
static int encodeISOArray(byte[] sa, int sp,
byte[] da, int dp, int len) {
// This method should tolerate invalid arguments, matching the lenient behavior of the VM intrinsic.
// Hence, using operator expressions instead of `Preconditions`, which throw on failure.
int sl;
if ((sp | dp | len) < 0 ||
// Halving the length of `sa` to obtain the number of characters:
sp >= (sl = sa.length >>> 1) || // Implicit null check on `sa`
dp >= da.length) { // Implicit null check on `da`
return 0;
}
int minLen = Math.min(len, Math.min(sl - sp, da.length - dp));
return encodeISOArray0(sa, sp, da, dp, minLen);
}
@IntrinsicCandidate
private static int encodeISOArray0(byte[] sa, int sp,
byte[] da, int dp, int len) {
int i = 0;
for (; i < len; i++) {
char c = StringUTF16.getChar(sa, sp++);
if (c > '\u00FF')
break;
da[dp++] = (byte)c;
}
return i;
}
/**
* Encodes as many ASCII codepoints as possible from the source
* character array into the destination byte array, assuming that
* the encoding is ASCII compatible.
*
* @param sa the source character array
* @param sp the index of the source array to start reading from
* @param da the target byte array
* @param dp the index of the target array to start writing to
* @param len the maximum number of characters to be encoded
* @return the total number of characters successfully encoded
* @throws NullPointerException if any of the provided arrays is null
*/
static int encodeAsciiArray(char[] sa, int sp,
byte[] da, int dp, int len) {
// This method should tolerate invalid arguments, matching the lenient behavior of the VM intrinsic.
// Hence, using operator expressions instead of `Preconditions`, which throw on failure.
if ((sp | dp | len) < 0 ||
sp >= sa.length || // Implicit null check on `sa`
dp >= da.length) { // Implicit null check on `da`
return 0;
}
int minLen = Math.min(len, Math.min(sa.length - sp, da.length - dp));
return encodeAsciiArray0(sa, sp, da, dp, minLen);
}
@IntrinsicCandidate
static int encodeAsciiArray0(char[] sa, int sp,
byte[] da, int dp, int len) {
int i = 0;
for (; i < len; i++) {
char c = sa[sp++];
if (c >= '\u0080')
break;
da[dp++] = (byte)c;
}
return i;
}
}