mirror of
https://github.com/openjdk/jdk.git
synced 2026-01-28 03:58:21 +00:00
8369564: Provide a MemorySegment API to read strings with known lengths
Co-authored-by: Per Minborg <pminborg@openjdk.org> Reviewed-by: jvernee, mcimadamore
This commit is contained in:
parent
556bddfd94
commit
d433ce5236
@ -2045,19 +2045,26 @@ public final class String
|
||||
return encode(Charset.defaultCharset(), coder(), value);
|
||||
}
|
||||
|
||||
boolean bytesCompatible(Charset charset) {
|
||||
boolean bytesCompatible(Charset charset, int srcIndex, int numChars) {
|
||||
if (isLatin1()) {
|
||||
if (charset == ISO_8859_1.INSTANCE) {
|
||||
return true; // ok, same encoding
|
||||
} else if (charset == UTF_8.INSTANCE || charset == US_ASCII.INSTANCE) {
|
||||
return !StringCoding.hasNegatives(value, 0, value.length); // ok, if ASCII-compatible
|
||||
return !StringCoding.hasNegatives(value, srcIndex, numChars); // ok, if ASCII-compatible
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void copyToSegmentRaw(MemorySegment segment, long offset) {
|
||||
MemorySegment.copy(value, 0, segment, ValueLayout.JAVA_BYTE, offset, value.length);
|
||||
void copyToSegmentRaw(MemorySegment segment, long offset, int srcIndex, int srcLength) {
|
||||
if (!isLatin1()) {
|
||||
// This method is intended to be used together with bytesCompatible, which currently only supports
|
||||
// latin1 strings. In the future, bytesCompatible could be updated to handle more cases, like
|
||||
// UTF-16 strings (when the platform and charset endianness match, and the String doesn’t contain
|
||||
// unpaired surrogates). If that happens, copyToSegmentRaw should also be updated.
|
||||
throw new IllegalStateException("This string does not support copyToSegmentRaw");
|
||||
}
|
||||
MemorySegment.copy(value, srcIndex, segment, ValueLayout.JAVA_BYTE, offset, srcLength);
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@ -2331,13 +2331,13 @@ public final class System {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void copyToSegmentRaw(String string, MemorySegment segment, long offset) {
|
||||
string.copyToSegmentRaw(segment, offset);
|
||||
public void copyToSegmentRaw(String string, MemorySegment segment, long offset, int srcIndex, int srcLength) {
|
||||
string.copyToSegmentRaw(segment, offset, srcIndex, srcLength);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean bytesCompatible(String string, Charset charset) {
|
||||
return string.bytesCompatible(charset);
|
||||
public boolean bytesCompatible(String string, Charset charset, int srcIndex, int numChars) {
|
||||
return string.bytesCompatible(charset, srcIndex, numChars);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
@ -1296,12 +1296,7 @@ public sealed interface MemorySegment permits AbstractMemorySegmentImpl {
|
||||
* over the decoding process is required.
|
||||
* <p>
|
||||
* Getting a string from a segment with a known byte offset and
|
||||
* known byte length can be done like so:
|
||||
* {@snippet lang=java :
|
||||
* byte[] bytes = new byte[length];
|
||||
* MemorySegment.copy(segment, JAVA_BYTE, offset, bytes, 0, length);
|
||||
* return new String(bytes, charset);
|
||||
* }
|
||||
* known byte length can be done using {@link #getString(long, Charset, long)}.
|
||||
*
|
||||
* @param offset offset in bytes (relative to this segment address) at which this
|
||||
* access operation will occur
|
||||
@ -1328,6 +1323,40 @@ public sealed interface MemorySegment permits AbstractMemorySegmentImpl {
|
||||
*/
|
||||
String getString(long offset, Charset charset);
|
||||
|
||||
/**
|
||||
* Reads a string from this segment at the given offset, using the provided length
|
||||
* and charset.
|
||||
* <p>
|
||||
* This method always replaces malformed-input and unmappable-character
|
||||
* sequences with this charset's default replacement string. The {@link
|
||||
* java.nio.charset.CharsetDecoder} class should be used when more control
|
||||
* over the decoding process is required.
|
||||
* <p>
|
||||
* If the string contains any {@code '\0'} characters, they will be read as well.
|
||||
* This differs from {@link #getString(long, Charset)}, which will only read up
|
||||
* to the first {@code '\0'}, resulting in truncation for string data that contains
|
||||
* the {@code '\0'} character.
|
||||
*
|
||||
* @param offset offset in bytes (relative to this segment address) at which this
|
||||
* access operation will occur
|
||||
* @param charset the charset used to {@linkplain Charset#newDecoder() decode} the
|
||||
* string bytes
|
||||
* @param byteLength length, in bytes, of the region of memory to read and decode into
|
||||
* a string
|
||||
* @return a Java string constructed from the bytes read from the given starting
|
||||
* address up to the given length
|
||||
* @throws IllegalArgumentException if the size of the string is greater than the
|
||||
* largest string supported by the platform
|
||||
* @throws IndexOutOfBoundsException if {@code offset < 0}
|
||||
* @throws IndexOutOfBoundsException if {@code offset > byteSize() - byteLength}
|
||||
* @throws IllegalStateException if the {@linkplain #scope() scope} associated with
|
||||
* this segment is not {@linkplain Scope#isAlive() alive}
|
||||
* @throws WrongThreadException if this method is called from a thread {@code T},
|
||||
* such that {@code isAccessibleBy(T) == false}
|
||||
* @throws IllegalArgumentException if {@code byteLength < 0}
|
||||
*/
|
||||
String getString(long offset, Charset charset, long byteLength);
|
||||
|
||||
/**
|
||||
* Writes the given string into this segment at the given offset, converting it to
|
||||
* a null-terminated byte sequence using the {@linkplain StandardCharsets#UTF_8 UTF-8}
|
||||
@ -1366,7 +1395,8 @@ public sealed interface MemorySegment permits AbstractMemorySegmentImpl {
|
||||
* If the given string contains any {@code '\0'} characters, they will be
|
||||
* copied as well. This means that, depending on the method used to read
|
||||
* the string, such as {@link MemorySegment#getString(long)}, the string
|
||||
* will appear truncated when read again.
|
||||
* will appear truncated when read again. The string can be read without
|
||||
* truncation using {@link #getString(long, Charset, long)}.
|
||||
*
|
||||
* @param offset offset in bytes (relative to this segment address) at which this
|
||||
* access operation will occur, the final address of this write
|
||||
@ -2606,6 +2636,50 @@ public sealed interface MemorySegment permits AbstractMemorySegmentImpl {
|
||||
elementCount);
|
||||
}
|
||||
|
||||
/**
|
||||
* Copies the byte sequence of the given string encoded using the provided charset
|
||||
* to the destination segment.
|
||||
* <p>
|
||||
* This method always replaces malformed-input and unmappable-character
|
||||
* sequences with this charset's default replacement string. The {@link
|
||||
* java.nio.charset.CharsetDecoder} class should be used when more control
|
||||
* over the decoding process is required.
|
||||
* <p>
|
||||
* If the given string contains any {@code '\0'} characters, they will be
|
||||
* copied as well. This means that, depending on the method used to read
|
||||
* the string, such as {@link MemorySegment#getString(long)}, the string
|
||||
* will appear truncated when read again. The string can be read without
|
||||
* truncation using {@link #getString(long, Charset, long)}.
|
||||
*
|
||||
* @param src the Java string to be written into the destination segment
|
||||
* @param dstEncoding the charset used to {@linkplain Charset#newEncoder() encode}
|
||||
* the string bytes.
|
||||
* @param srcIndex the starting character index of the source string
|
||||
* @param dst the destination segment
|
||||
* @param dstOffset the starting offset, in bytes, of the destination segment
|
||||
* @param numChars the number of characters to be copied
|
||||
* @throws IllegalStateException if the {@linkplain #scope() scope} associated with
|
||||
* {@code dst} is not {@linkplain Scope#isAlive() alive}
|
||||
* @throws WrongThreadException if this method is called from a thread {@code T},
|
||||
* such that {@code dst.isAccessibleBy(T) == false}
|
||||
* @throws IndexOutOfBoundsException if either {@code srcIndex}, {@code numChars}, or {@code dstOffset}
|
||||
* are {@code < 0}
|
||||
* @throws IndexOutOfBoundsException if {@code srcIndex > src.length() - numChars}
|
||||
* @throws IllegalArgumentException if {@code dst} is {@linkplain #isReadOnly() read-only}
|
||||
* @throws IndexOutOfBoundsException if {@code dstOffset > dstSegment.byteSize() - B} where {@code B} is the size,
|
||||
* in bytes, of the substring of {@code src} encoded using the given charset
|
||||
* @return the number of copied bytes.
|
||||
*/
|
||||
@ForceInline
|
||||
static long copy(String src, Charset dstEncoding, int srcIndex, MemorySegment dst, long dstOffset, int numChars) {
|
||||
Objects.requireNonNull(src);
|
||||
Objects.requireNonNull(dstEncoding);
|
||||
Objects.requireNonNull(dst);
|
||||
Objects.checkFromIndexSize(srcIndex, numChars, src.length());
|
||||
|
||||
return AbstractMemorySegmentImpl.copy(src, dstEncoding, srcIndex, dst, dstOffset, numChars);
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds and returns the relative offset, in bytes, of the first mismatch between the
|
||||
* source and the destination segments. More specifically, the bytes at offset
|
||||
|
||||
@ -111,7 +111,8 @@ public interface SegmentAllocator {
|
||||
* If the given string contains any {@code '\0'} characters, they will be
|
||||
* copied as well. This means that, depending on the method used to read
|
||||
* the string, such as {@link MemorySegment#getString(long)}, the string
|
||||
* will appear truncated when read again.
|
||||
* will appear truncated when read again. The string can be read without
|
||||
* truncation using {@link MemorySegment#getString(long, Charset, long)}.
|
||||
*
|
||||
* @param str the Java string to be converted into a C string
|
||||
* @param charset the charset used to {@linkplain Charset#newEncoder() encode} the
|
||||
@ -137,10 +138,10 @@ public interface SegmentAllocator {
|
||||
int termCharSize = StringSupport.CharsetKind.of(charset).terminatorCharSize();
|
||||
MemorySegment segment;
|
||||
int length;
|
||||
if (StringSupport.bytesCompatible(str, charset)) {
|
||||
if (StringSupport.bytesCompatible(str, charset, 0, str.length())) {
|
||||
length = str.length();
|
||||
segment = allocateNoInit((long) length + termCharSize);
|
||||
StringSupport.copyToSegmentRaw(str, segment, 0);
|
||||
StringSupport.copyToSegmentRaw(str, segment, 0, 0, str.length());
|
||||
} else {
|
||||
byte[] bytes = str.getBytes(charset);
|
||||
length = bytes.length;
|
||||
@ -153,6 +154,53 @@ public interface SegmentAllocator {
|
||||
return segment;
|
||||
}
|
||||
|
||||
/**
|
||||
* Encodes a Java string using the provided charset and stores the resulting
|
||||
* byte array into a memory segment.
|
||||
* <p>
|
||||
* This method always replaces malformed-input and unmappable-character
|
||||
* sequences with this charset's default replacement byte array. The
|
||||
* {@link java.nio.charset.CharsetEncoder} class should be used when more
|
||||
* control over the encoding process is required.
|
||||
* <p>
|
||||
* If the given string contains any {@code '\0'} characters, they will be
|
||||
* copied as well. This means that, depending on the method used to read
|
||||
* the string, such as {@link MemorySegment#getString(long)}, the string
|
||||
* will appear truncated when read again. The string can be read without
|
||||
* truncation using {@link MemorySegment#getString(long, Charset, long)}.
|
||||
*
|
||||
* @param str the Java string to be encoded
|
||||
* @param charset the charset used to {@linkplain Charset#newEncoder() encode} the
|
||||
* string bytes
|
||||
* @param srcIndex the starting index of the source string
|
||||
* @param numChars the number of characters to be copied
|
||||
* @return a new native segment containing the encoded string
|
||||
* @throws IndexOutOfBoundsException if either {@code srcIndex} or {@code numChars} are {@code < 0}
|
||||
* @throws IndexOutOfBoundsException if {@code srcIndex > str.length() - numChars}
|
||||
*
|
||||
* @implSpec The default implementation for this method copies the contents of the
|
||||
* provided Java string into a new memory segment obtained by calling
|
||||
* {@code this.allocate(B)}, where {@code B} is the size, in bytes, of
|
||||
* the string encoded using the provided charset
|
||||
* (e.g. {@code str.getBytes(charset).length});
|
||||
*/
|
||||
@ForceInline
|
||||
default MemorySegment allocateFrom(String str, Charset charset, int srcIndex, int numChars) {
|
||||
Objects.requireNonNull(charset);
|
||||
Objects.requireNonNull(str);
|
||||
Objects.checkFromIndexSize(srcIndex, numChars, str.length());
|
||||
MemorySegment segment;
|
||||
if (StringSupport.bytesCompatible(str, charset, srcIndex, numChars)) {
|
||||
segment = allocateNoInit(numChars);
|
||||
StringSupport.copyToSegmentRaw(str, segment, 0, srcIndex, numChars);
|
||||
} else {
|
||||
byte[] bytes = str.substring(srcIndex, srcIndex + numChars).getBytes(charset);
|
||||
segment = allocateNoInit(bytes.length);
|
||||
MemorySegment.copy(bytes, 0, segment, ValueLayout.JAVA_BYTE, 0, bytes.length);
|
||||
}
|
||||
return segment;
|
||||
}
|
||||
|
||||
/**
|
||||
* {@return a new memory segment initialized with the provided byte value}
|
||||
* <p>
|
||||
|
||||
@ -634,10 +634,10 @@ public interface JavaLangAccess {
|
||||
/**
|
||||
* Copy the string bytes to an existing segment, avoiding intermediate copies.
|
||||
*/
|
||||
void copyToSegmentRaw(String string, MemorySegment segment, long offset);
|
||||
void copyToSegmentRaw(String string, MemorySegment segment, long offset, int srcIndex, int srcLength);
|
||||
|
||||
/**
|
||||
* Are the string bytes compatible with the given charset?
|
||||
*/
|
||||
boolean bytesCompatible(String string, Charset charset);
|
||||
boolean bytesCompatible(String string, Charset charset, int srcIndex, int numChars);
|
||||
}
|
||||
|
||||
@ -551,6 +551,13 @@ public abstract sealed class AbstractMemorySegmentImpl
|
||||
unsafeGetOffset() == that.unsafeGetOffset();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getString(long offset, Charset charset, long byteLength) {
|
||||
Utils.checkNonNegativeArgument(byteLength, "byteLength");
|
||||
Objects.requireNonNull(charset);
|
||||
return StringSupport.read(this, offset, charset, byteLength);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(
|
||||
@ -702,6 +709,16 @@ public abstract sealed class AbstractMemorySegmentImpl
|
||||
}
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
public static long copy(String src, Charset dstEncoding, int srcIndex, MemorySegment dst, long dstOffset, int numChars) {
|
||||
Objects.requireNonNull(src);
|
||||
Objects.requireNonNull(dstEncoding);
|
||||
Objects.requireNonNull(dst);
|
||||
|
||||
AbstractMemorySegmentImpl destImpl = (AbstractMemorySegmentImpl)dst;
|
||||
return StringSupport.copyBytes(src, destImpl, dstEncoding, dstOffset, srcIndex, numChars);
|
||||
}
|
||||
|
||||
// accessors
|
||||
|
||||
@ForceInline
|
||||
|
||||
@ -30,11 +30,14 @@ import jdk.internal.access.SharedSecrets;
|
||||
import jdk.internal.misc.ScopedMemoryAccess;
|
||||
import jdk.internal.util.Architecture;
|
||||
import jdk.internal.util.ArraysSupport;
|
||||
import jdk.internal.util.Preconditions;
|
||||
import jdk.internal.vm.annotation.ForceInline;
|
||||
|
||||
import java.lang.foreign.MemorySegment;
|
||||
import java.lang.reflect.Array;
|
||||
import java.nio.charset.CharacterCodingException;
|
||||
import java.nio.charset.Charset;
|
||||
import java.util.Objects;
|
||||
|
||||
import static java.lang.foreign.ValueLayout.*;
|
||||
|
||||
@ -58,6 +61,27 @@ public final class StringSupport {
|
||||
};
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
public static String read(AbstractMemorySegmentImpl segment, long offset, Charset charset, long length) {
|
||||
return readBytes(segment, offset, charset, length);
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
public static String readBytes(AbstractMemorySegmentImpl segment, long offset, Charset charset, long length) {
|
||||
if (length > Integer.MAX_VALUE) {
|
||||
throw new IllegalArgumentException("Required length exceeds implementation limit");
|
||||
}
|
||||
final int lengthBytes = (int) length;
|
||||
final byte[] bytes = new byte[lengthBytes];
|
||||
MemorySegment.copy(segment, JAVA_BYTE, offset, bytes, 0, lengthBytes);
|
||||
try {
|
||||
return JAVA_LANG_ACCESS.uncheckedNewStringOrThrow(bytes, charset);
|
||||
} catch (CharacterCodingException _) {
|
||||
// use replacement characters for malformed input
|
||||
return new String(bytes, charset);
|
||||
}
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
public static void write(AbstractMemorySegmentImpl segment, long offset, Charset charset, String string) {
|
||||
switch (CharsetKind.of(charset)) {
|
||||
@ -70,14 +94,7 @@ public final class StringSupport {
|
||||
@ForceInline
|
||||
private static String readByte(AbstractMemorySegmentImpl segment, long offset, Charset charset) {
|
||||
final int len = strlenByte(segment, offset, segment.byteSize());
|
||||
final byte[] bytes = new byte[len];
|
||||
MemorySegment.copy(segment, JAVA_BYTE, offset, bytes, 0, len);
|
||||
try {
|
||||
return JAVA_LANG_ACCESS.uncheckedNewStringOrThrow(bytes, charset);
|
||||
} catch (CharacterCodingException _) {
|
||||
// use replacement characters for malformed input
|
||||
return new String(bytes, charset);
|
||||
}
|
||||
return readBytes(segment, offset, charset, len);
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@ -89,14 +106,7 @@ public final class StringSupport {
|
||||
@ForceInline
|
||||
private static String readShort(AbstractMemorySegmentImpl segment, long offset, Charset charset) {
|
||||
int len = strlenShort(segment, offset, segment.byteSize());
|
||||
byte[] bytes = new byte[len];
|
||||
MemorySegment.copy(segment, JAVA_BYTE, offset, bytes, 0, len);
|
||||
try {
|
||||
return JAVA_LANG_ACCESS.uncheckedNewStringOrThrow(bytes, charset);
|
||||
} catch (CharacterCodingException _) {
|
||||
// use replacement characters for malformed input
|
||||
return new String(bytes, charset);
|
||||
}
|
||||
return readBytes(segment, offset, charset, len);
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@ -108,14 +118,7 @@ public final class StringSupport {
|
||||
@ForceInline
|
||||
private static String readInt(AbstractMemorySegmentImpl segment, long offset, Charset charset) {
|
||||
int len = strlenInt(segment, offset, segment.byteSize());
|
||||
byte[] bytes = new byte[len];
|
||||
MemorySegment.copy(segment, JAVA_BYTE, offset, bytes, 0, len);
|
||||
try {
|
||||
return JAVA_LANG_ACCESS.uncheckedNewStringOrThrow(bytes, charset);
|
||||
} catch (CharacterCodingException _) {
|
||||
// use replacement characters for malformed input
|
||||
return new String(bytes, charset);
|
||||
}
|
||||
return readBytes(segment, offset, charset, len);
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@ -345,22 +348,26 @@ public final class StringSupport {
|
||||
}
|
||||
}
|
||||
|
||||
public static boolean bytesCompatible(String string, Charset charset) {
|
||||
return JAVA_LANG_ACCESS.bytesCompatible(string, charset);
|
||||
public static boolean bytesCompatible(String string, Charset charset, int srcIndex, int numChars) {
|
||||
return JAVA_LANG_ACCESS.bytesCompatible(string, charset, srcIndex, numChars);
|
||||
}
|
||||
|
||||
public static int copyBytes(String string, MemorySegment segment, Charset charset, long offset) {
|
||||
if (bytesCompatible(string, charset)) {
|
||||
copyToSegmentRaw(string, segment, offset);
|
||||
return string.length();
|
||||
return copyBytes(string, segment, charset, offset, 0, string.length());
|
||||
}
|
||||
|
||||
public static int copyBytes(String string, MemorySegment segment, Charset charset, long offset, int srcIndex, int numChars) {
|
||||
if (bytesCompatible(string, charset, srcIndex, numChars)) {
|
||||
copyToSegmentRaw(string, segment, offset, srcIndex, numChars);
|
||||
return numChars;
|
||||
} else {
|
||||
byte[] bytes = string.getBytes(charset);
|
||||
byte[] bytes = string.substring(srcIndex, srcIndex + numChars).getBytes(charset);
|
||||
MemorySegment.copy(bytes, 0, segment, JAVA_BYTE, offset, bytes.length);
|
||||
return bytes.length;
|
||||
}
|
||||
}
|
||||
|
||||
public static void copyToSegmentRaw(String string, MemorySegment segment, long offset) {
|
||||
JAVA_LANG_ACCESS.copyToSegmentRaw(string, segment, offset);
|
||||
public static void copyToSegmentRaw(String string, MemorySegment segment, long offset, int srcIndex, int srcLength) {
|
||||
JAVA_LANG_ACCESS.copyToSegmentRaw(string, segment, offset, srcIndex, srcLength);
|
||||
}
|
||||
}
|
||||
|
||||
@ -37,6 +37,7 @@ import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Random;
|
||||
import java.util.Set;
|
||||
import java.util.function.UnaryOperator;
|
||||
|
||||
import jdk.internal.foreign.AbstractMemorySegmentImpl;
|
||||
@ -102,6 +103,140 @@ public class TestStringEncoding {
|
||||
}
|
||||
}
|
||||
|
||||
@Test(dataProvider = "strings")
|
||||
public void testStringsLength(String testString) {
|
||||
if (!testString.isEmpty()) {
|
||||
for (Charset charset : Charset.availableCharsets().values()) {
|
||||
if (charset.canEncode()) {
|
||||
for (Arena arena : arenas()) {
|
||||
try (arena) {
|
||||
MemorySegment text = arena.allocateFrom(testString, charset, 0, testString.length());
|
||||
long length = text.byteSize();
|
||||
assertEquals(length, testString.getBytes(charset).length);
|
||||
String roundTrip = text.getString(0, charset, length);
|
||||
if (charset.newEncoder().canEncode(testString)) {
|
||||
assertEquals(roundTrip, testString);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test(dataProvider = "strings")
|
||||
public void testStringsCopy(String testString) {
|
||||
if (!testString.isEmpty()) {
|
||||
for (Charset charset : Charset.availableCharsets().values()) {
|
||||
if (charset.canEncode()) {
|
||||
for (Arena arena : arenas()) {
|
||||
try (arena) {
|
||||
byte[] bytes = testString.getBytes(charset);
|
||||
MemorySegment text = arena.allocate(JAVA_BYTE, bytes.length);
|
||||
MemorySegment.copy(testString, charset, 0, text, 0, testString.length());
|
||||
String roundTrip = text.getString(0, charset, bytes.length);
|
||||
if (charset.newEncoder().canEncode(testString)) {
|
||||
assertEquals(roundTrip, testString);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testStringsLengthNegative() {
|
||||
try (Arena arena = Arena.ofConfined()) {
|
||||
var segment = arena.allocateFrom("abc");
|
||||
assertThrows(IllegalArgumentException.class, () -> segment.getString(1, StandardCharsets.UTF_8, -1));
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCopyThrows() {
|
||||
try (Arena arena = Arena.ofConfined()) {
|
||||
String testString = "abc";
|
||||
String testString_notBytesCompatible = "snowman \u26C4";
|
||||
MemorySegment text = arena.allocate(JAVA_BYTE, 3);
|
||||
MemorySegment text_notBytesCompatible = arena.allocate(JAVA_BYTE,
|
||||
testString_notBytesCompatible.getBytes(StandardCharsets.UTF_8).length);
|
||||
MemorySegment.copy(testString, StandardCharsets.UTF_8, 0, text, 0, testString.length());
|
||||
MemorySegment.copy(testString_notBytesCompatible, StandardCharsets.UTF_8, 0,
|
||||
text_notBytesCompatible, 0,
|
||||
testString_notBytesCompatible.length());
|
||||
// srcIndex < 0
|
||||
assertThrows(IndexOutOfBoundsException.class, () ->
|
||||
MemorySegment.copy(testString, StandardCharsets.UTF_8, -1, text, 0, testString.length()));
|
||||
// dstOffset < 0
|
||||
assertThrows(IndexOutOfBoundsException.class, () ->
|
||||
MemorySegment.copy(testString, StandardCharsets.UTF_8, 0, text, -1, testString.length()));
|
||||
// numChars < 0
|
||||
assertThrows(IndexOutOfBoundsException.class, () ->
|
||||
MemorySegment.copy(testString, StandardCharsets.UTF_8, 0, text, 0, -1));
|
||||
// srcIndex + numChars > length
|
||||
assertThrows(IndexOutOfBoundsException.class, () ->
|
||||
MemorySegment.copy(testString, StandardCharsets.UTF_8, 1, text, 0, testString.length()));
|
||||
assertThrows(IndexOutOfBoundsException.class, () ->
|
||||
MemorySegment.copy(testString, StandardCharsets.UTF_8, 0, text, 0, testString.length() + 1));
|
||||
// dstOffset > byteSize() - B
|
||||
assertThrows(IndexOutOfBoundsException.class, () ->
|
||||
MemorySegment.copy(testString, StandardCharsets.UTF_8, 0, text, 1, testString.length()));
|
||||
// srcIndex + numChars overflows
|
||||
assertThrows(IndexOutOfBoundsException.class, () ->
|
||||
MemorySegment.copy(testString, StandardCharsets.UTF_8, Integer.MAX_VALUE, text, 0, Integer.MAX_VALUE + 3));
|
||||
assertThrows(IndexOutOfBoundsException.class, () ->
|
||||
MemorySegment.copy(testString_notBytesCompatible, StandardCharsets.UTF_8, Integer.MAX_VALUE, text, 0, Integer.MAX_VALUE + 3));
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAllocateFromThrows() {
|
||||
try (Arena arena = Arena.ofConfined()) {
|
||||
String testString = "abc";
|
||||
String testString_notBytesCompatible = "snowman \u26C4";
|
||||
arena.allocateFrom(testString, StandardCharsets.UTF_8, 0, testString.length());
|
||||
arena.allocateFrom(testString, StandardCharsets.UTF_8, 2, 1);
|
||||
// srcIndex < 0
|
||||
assertThrows(IndexOutOfBoundsException.class, () ->
|
||||
arena.allocateFrom(testString, StandardCharsets.UTF_8, -1, testString.length()));
|
||||
// numChars < 0
|
||||
assertThrows(IndexOutOfBoundsException.class, () ->
|
||||
arena.allocateFrom(testString, StandardCharsets.UTF_8, 0, -1));
|
||||
// srcIndex + numChars > length
|
||||
assertThrows(IndexOutOfBoundsException.class, () ->
|
||||
arena.allocateFrom(testString, StandardCharsets.UTF_8, 0, testString.length() + 1));
|
||||
assertThrows(IndexOutOfBoundsException.class, () ->
|
||||
arena.allocateFrom(testString, StandardCharsets.UTF_8, 1, testString.length()));
|
||||
// srcIndex + numChars overflows
|
||||
assertThrows(IndexOutOfBoundsException.class, () ->
|
||||
arena.allocateFrom(testString, StandardCharsets.UTF_8, 3, Integer.MAX_VALUE));
|
||||
assertThrows(IndexOutOfBoundsException.class, () -> arena.allocateFrom(
|
||||
testString_notBytesCompatible, StandardCharsets.UTF_8, 3, Integer.MAX_VALUE));
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetStringThrows() {
|
||||
try (Arena arena = Arena.ofConfined()) {
|
||||
String testString = "abc";
|
||||
MemorySegment text = arena.allocateFrom(testString, StandardCharsets.UTF_8, 0, testString.length());
|
||||
text.getString(0, StandardCharsets.UTF_8, 3);
|
||||
// unsupported string size
|
||||
assertThrows(IllegalArgumentException.class, () ->
|
||||
text.getString(0, StandardCharsets.UTF_8, Integer.MAX_VALUE + 1L));
|
||||
// offset < 0
|
||||
assertThrows(IndexOutOfBoundsException.class, () ->
|
||||
text.getString(-1, StandardCharsets.UTF_8, 3));
|
||||
// offset > byteSize() - length
|
||||
assertThrows(IndexOutOfBoundsException.class, () ->
|
||||
text.getString(1, StandardCharsets.UTF_8, 3));
|
||||
// length < 0
|
||||
assertThrows(IllegalArgumentException.class, () ->
|
||||
text.getString(0, StandardCharsets.UTF_8, -1));
|
||||
}
|
||||
}
|
||||
|
||||
@Test(dataProvider = "strings")
|
||||
public void testStringsHeap(String testString) {
|
||||
for (Charset charset : singleByteCharsets()) {
|
||||
@ -221,6 +356,74 @@ public class TestStringEncoding {
|
||||
}
|
||||
}
|
||||
|
||||
@Test(dataProvider = "strings")
|
||||
public void testSubstringGetString(String testString) {
|
||||
if (testString.length() < 3 || !containsOnlyRegularCharacters(testString)) {
|
||||
return;
|
||||
}
|
||||
for (var charset : singleByteCharsets()) {
|
||||
for (var arena: arenas()) {
|
||||
try (arena) {
|
||||
MemorySegment text = arena.allocateFrom(testString, charset, 0, testString.length());
|
||||
for (int srcIndex = 0; srcIndex <= testString.length(); srcIndex++) {
|
||||
for (int numChars = 0; numChars <= testString.length() - srcIndex; numChars++) {
|
||||
// this test assumes single-byte charsets
|
||||
String roundTrip = text.getString(srcIndex, charset, numChars);
|
||||
String substring = testString.substring(srcIndex, srcIndex + numChars);
|
||||
assertEquals(roundTrip, substring);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test(dataProvider = "strings")
|
||||
public void testSubstringAllocate(String testString) {
|
||||
if (testString.length() < 3 || !containsOnlyRegularCharacters(testString)) {
|
||||
return;
|
||||
}
|
||||
for (var charset : singleByteCharsets()) {
|
||||
for (var arena: arenas()) {
|
||||
try (arena) {
|
||||
for (int srcIndex = 0; srcIndex <= testString.length(); srcIndex++) {
|
||||
for (int numChars = 0; numChars <= testString.length() - srcIndex; numChars++) {
|
||||
MemorySegment text = arena.allocateFrom(testString, charset, srcIndex, numChars);
|
||||
String substring = testString.substring(srcIndex, srcIndex + numChars);
|
||||
assertEquals(text.byteSize(), substring.getBytes(charset).length);
|
||||
String roundTrip = text.getString(0, charset, text.byteSize());
|
||||
assertEquals(roundTrip, substring);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test(dataProvider = "strings")
|
||||
public void testSubstringCopy(String testString) {
|
||||
if (testString.length() < 3 || !containsOnlyRegularCharacters(testString)) {
|
||||
return;
|
||||
}
|
||||
for (var charset : singleByteCharsets()) {
|
||||
for (var arena: arenas()) {
|
||||
try (arena) {
|
||||
for (int srcIndex = 0; srcIndex <= testString.length(); srcIndex++) {
|
||||
for (int numChars = 0; numChars <= testString.length() - srcIndex; numChars++) {
|
||||
String substring = testString.substring(srcIndex, srcIndex + numChars);
|
||||
long length = substring.getBytes(charset).length;
|
||||
MemorySegment text = arena.allocate(JAVA_BYTE, length);
|
||||
long copied = MemorySegment.copy(testString, charset, srcIndex, text, 0, numChars);
|
||||
String roundTrip = text.getString(0, charset, length);
|
||||
assertEquals(roundTrip, substring);
|
||||
assertEquals(copied, length);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static final MemoryLayout CHAR_POINTER = ADDRESS
|
||||
.withTargetLayout(MemoryLayout.sequenceLayout(Long.MAX_VALUE, JAVA_BYTE));
|
||||
private static final Linker LINKER = Linker.nativeLinker();
|
||||
@ -402,7 +605,7 @@ public class TestStringEncoding {
|
||||
{""},
|
||||
{"X"},
|
||||
{"12345"},
|
||||
{"yen \u00A5"},
|
||||
{"section \u00A7"},
|
||||
{"snowman \u26C4"},
|
||||
{"rainbow \uD83C\uDF08"},
|
||||
{"0"},
|
||||
|
||||
@ -0,0 +1,94 @@
|
||||
/*
|
||||
* Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
package org.openjdk.bench.java.lang.foreign;
|
||||
|
||||
import static java.lang.foreign.ValueLayout.JAVA_BYTE;
|
||||
import static java.nio.charset.StandardCharsets.UTF_8;
|
||||
|
||||
import org.openjdk.jmh.annotations.Benchmark;
|
||||
import org.openjdk.jmh.annotations.BenchmarkMode;
|
||||
import org.openjdk.jmh.annotations.Fork;
|
||||
import org.openjdk.jmh.annotations.Measurement;
|
||||
import org.openjdk.jmh.annotations.Mode;
|
||||
import org.openjdk.jmh.annotations.OutputTimeUnit;
|
||||
import org.openjdk.jmh.annotations.Param;
|
||||
import org.openjdk.jmh.annotations.Scope;
|
||||
import org.openjdk.jmh.annotations.Setup;
|
||||
import org.openjdk.jmh.annotations.State;
|
||||
import org.openjdk.jmh.annotations.Warmup;
|
||||
|
||||
import java.lang.foreign.Arena;
|
||||
import java.lang.foreign.MemorySegment;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
@BenchmarkMode(Mode.AverageTime)
|
||||
@Warmup(iterations = 5, time = 500, timeUnit = TimeUnit.MILLISECONDS)
|
||||
@Measurement(iterations = 10, time = 500, timeUnit = TimeUnit.MILLISECONDS)
|
||||
@State(Scope.Benchmark)
|
||||
@OutputTimeUnit(TimeUnit.NANOSECONDS)
|
||||
@Fork(value = 3)
|
||||
public class FromJavaStringTest {
|
||||
|
||||
private String str;
|
||||
private MemorySegment strSegment;
|
||||
private int lengthBytes;
|
||||
|
||||
@Param({"5", "20", "100", "200", "451"})
|
||||
int size;
|
||||
|
||||
@Setup
|
||||
public void setup() {
|
||||
var arena = Arena.ofAuto();
|
||||
while (LOREM.length() < size) {
|
||||
LOREM += LOREM;
|
||||
}
|
||||
str = LOREM.substring(0, size);
|
||||
strSegment = arena.allocateFrom(str);
|
||||
lengthBytes = str.getBytes(UTF_8).length;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void segment_setString() {
|
||||
strSegment.setString(0, str, UTF_8);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void segment_copyStringRaw() {
|
||||
MemorySegment.copy(str, UTF_8, 0, strSegment, 0, str.length());
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void segment_copyStringBytes() {
|
||||
byte[] bytes = str.getBytes(UTF_8);
|
||||
MemorySegment.copy(bytes, 0, strSegment, JAVA_BYTE, 0, bytes.length);
|
||||
}
|
||||
|
||||
static String LOREM =
|
||||
"""
|
||||
Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et
|
||||
dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip
|
||||
ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu
|
||||
fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt
|
||||
mollit anim id est laborum.
|
||||
""";
|
||||
}
|
||||
@ -22,6 +22,9 @@
|
||||
*/
|
||||
package org.openjdk.bench.java.lang.foreign;
|
||||
|
||||
import static java.lang.foreign.ValueLayout.JAVA_BYTE;
|
||||
import static java.nio.charset.StandardCharsets.UTF_8;
|
||||
|
||||
import org.openjdk.jmh.annotations.Benchmark;
|
||||
import org.openjdk.jmh.annotations.BenchmarkMode;
|
||||
import org.openjdk.jmh.annotations.Fork;
|
||||
@ -47,6 +50,7 @@ import java.util.concurrent.TimeUnit;
|
||||
public class ToJavaStringTest {
|
||||
|
||||
private MemorySegment strSegment;
|
||||
private int length;
|
||||
|
||||
@Param({"5", "20", "100", "200", "451"})
|
||||
int size;
|
||||
@ -61,19 +65,33 @@ public class ToJavaStringTest {
|
||||
while (LOREM.length() < size) {
|
||||
LOREM += LOREM;
|
||||
}
|
||||
strSegment = arena.allocateFrom(LOREM.substring(0, size));
|
||||
var s = LOREM.substring(0, size);
|
||||
strSegment = arena.allocateFrom(s);
|
||||
length = s.getBytes(UTF_8).length;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public String panama_readString() {
|
||||
public String segment_getString() {
|
||||
return strSegment.getString(0);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public String segment_getStringLength() {
|
||||
return strSegment.getString(0, UTF_8, length);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public String jni_readString() {
|
||||
return readString(strSegment.address());
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public String segment_copyStringBytes() {
|
||||
byte[] bytes = new byte[length];
|
||||
MemorySegment.copy(strSegment, JAVA_BYTE, 0, bytes, 0, length);
|
||||
return new String(bytes, UTF_8);
|
||||
}
|
||||
|
||||
static native String readString(long addr);
|
||||
|
||||
static String LOREM = """
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user