8369564: Provide a MemorySegment API to read strings with known lengths

Co-authored-by: Per Minborg <pminborg@openjdk.org>
Reviewed-by: jvernee, mcimadamore
This commit is contained in:
Liam Miller-Cushon 2026-01-12 15:22:42 +00:00
parent 556bddfd94
commit d433ce5236
10 changed files with 523 additions and 55 deletions

View File

@ -2045,19 +2045,26 @@ public final class String
return encode(Charset.defaultCharset(), coder(), value);
}
boolean bytesCompatible(Charset charset) {
boolean bytesCompatible(Charset charset, int srcIndex, int numChars) {
if (isLatin1()) {
if (charset == ISO_8859_1.INSTANCE) {
return true; // ok, same encoding
} else if (charset == UTF_8.INSTANCE || charset == US_ASCII.INSTANCE) {
return !StringCoding.hasNegatives(value, 0, value.length); // ok, if ASCII-compatible
return !StringCoding.hasNegatives(value, srcIndex, numChars); // ok, if ASCII-compatible
}
}
return false;
}
void copyToSegmentRaw(MemorySegment segment, long offset) {
MemorySegment.copy(value, 0, segment, ValueLayout.JAVA_BYTE, offset, value.length);
void copyToSegmentRaw(MemorySegment segment, long offset, int srcIndex, int srcLength) {
if (!isLatin1()) {
// This method is intended to be used together with bytesCompatible, which currently only supports
// latin1 strings. In the future, bytesCompatible could be updated to handle more cases, like
// UTF-16 strings (when the platform and charset endianness match, and the String doesnt contain
// unpaired surrogates). If that happens, copyToSegmentRaw should also be updated.
throw new IllegalStateException("This string does not support copyToSegmentRaw");
}
MemorySegment.copy(value, srcIndex, segment, ValueLayout.JAVA_BYTE, offset, srcLength);
}
/**

View File

@ -2331,13 +2331,13 @@ public final class System {
}
@Override
public void copyToSegmentRaw(String string, MemorySegment segment, long offset) {
string.copyToSegmentRaw(segment, offset);
public void copyToSegmentRaw(String string, MemorySegment segment, long offset, int srcIndex, int srcLength) {
string.copyToSegmentRaw(segment, offset, srcIndex, srcLength);
}
@Override
public boolean bytesCompatible(String string, Charset charset) {
return string.bytesCompatible(charset);
public boolean bytesCompatible(String string, Charset charset, int srcIndex, int numChars) {
return string.bytesCompatible(charset, srcIndex, numChars);
}
});
}

View File

@ -1296,12 +1296,7 @@ public sealed interface MemorySegment permits AbstractMemorySegmentImpl {
* over the decoding process is required.
* <p>
* Getting a string from a segment with a known byte offset and
* known byte length can be done like so:
* {@snippet lang=java :
* byte[] bytes = new byte[length];
* MemorySegment.copy(segment, JAVA_BYTE, offset, bytes, 0, length);
* return new String(bytes, charset);
* }
* known byte length can be done using {@link #getString(long, Charset, long)}.
*
* @param offset offset in bytes (relative to this segment address) at which this
* access operation will occur
@ -1328,6 +1323,40 @@ public sealed interface MemorySegment permits AbstractMemorySegmentImpl {
*/
String getString(long offset, Charset charset);
/**
* Reads a string from this segment at the given offset, using the provided length
* and charset.
* <p>
* This method always replaces malformed-input and unmappable-character
* sequences with this charset's default replacement string. The {@link
* java.nio.charset.CharsetDecoder} class should be used when more control
* over the decoding process is required.
* <p>
* If the string contains any {@code '\0'} characters, they will be read as well.
* This differs from {@link #getString(long, Charset)}, which will only read up
* to the first {@code '\0'}, resulting in truncation for string data that contains
* the {@code '\0'} character.
*
* @param offset offset in bytes (relative to this segment address) at which this
* access operation will occur
* @param charset the charset used to {@linkplain Charset#newDecoder() decode} the
* string bytes
* @param byteLength length, in bytes, of the region of memory to read and decode into
* a string
* @return a Java string constructed from the bytes read from the given starting
* address up to the given length
* @throws IllegalArgumentException if the size of the string is greater than the
* largest string supported by the platform
* @throws IndexOutOfBoundsException if {@code offset < 0}
* @throws IndexOutOfBoundsException if {@code offset > byteSize() - byteLength}
* @throws IllegalStateException if the {@linkplain #scope() scope} associated with
* this segment is not {@linkplain Scope#isAlive() alive}
* @throws WrongThreadException if this method is called from a thread {@code T},
* such that {@code isAccessibleBy(T) == false}
* @throws IllegalArgumentException if {@code byteLength < 0}
*/
String getString(long offset, Charset charset, long byteLength);
/**
* Writes the given string into this segment at the given offset, converting it to
* a null-terminated byte sequence using the {@linkplain StandardCharsets#UTF_8 UTF-8}
@ -1366,7 +1395,8 @@ public sealed interface MemorySegment permits AbstractMemorySegmentImpl {
* If the given string contains any {@code '\0'} characters, they will be
* copied as well. This means that, depending on the method used to read
* the string, such as {@link MemorySegment#getString(long)}, the string
* will appear truncated when read again.
* will appear truncated when read again. The string can be read without
* truncation using {@link #getString(long, Charset, long)}.
*
* @param offset offset in bytes (relative to this segment address) at which this
* access operation will occur, the final address of this write
@ -2606,6 +2636,50 @@ public sealed interface MemorySegment permits AbstractMemorySegmentImpl {
elementCount);
}
/**
* Copies the byte sequence of the given string encoded using the provided charset
* to the destination segment.
* <p>
* This method always replaces malformed-input and unmappable-character
* sequences with this charset's default replacement string. The {@link
* java.nio.charset.CharsetDecoder} class should be used when more control
* over the decoding process is required.
* <p>
* If the given string contains any {@code '\0'} characters, they will be
* copied as well. This means that, depending on the method used to read
* the string, such as {@link MemorySegment#getString(long)}, the string
* will appear truncated when read again. The string can be read without
* truncation using {@link #getString(long, Charset, long)}.
*
* @param src the Java string to be written into the destination segment
* @param dstEncoding the charset used to {@linkplain Charset#newEncoder() encode}
* the string bytes.
* @param srcIndex the starting character index of the source string
* @param dst the destination segment
* @param dstOffset the starting offset, in bytes, of the destination segment
* @param numChars the number of characters to be copied
* @throws IllegalStateException if the {@linkplain #scope() scope} associated with
* {@code dst} is not {@linkplain Scope#isAlive() alive}
* @throws WrongThreadException if this method is called from a thread {@code T},
* such that {@code dst.isAccessibleBy(T) == false}
* @throws IndexOutOfBoundsException if either {@code srcIndex}, {@code numChars}, or {@code dstOffset}
* are {@code < 0}
* @throws IndexOutOfBoundsException if {@code srcIndex > src.length() - numChars}
* @throws IllegalArgumentException if {@code dst} is {@linkplain #isReadOnly() read-only}
* @throws IndexOutOfBoundsException if {@code dstOffset > dstSegment.byteSize() - B} where {@code B} is the size,
* in bytes, of the substring of {@code src} encoded using the given charset
* @return the number of copied bytes.
*/
@ForceInline
static long copy(String src, Charset dstEncoding, int srcIndex, MemorySegment dst, long dstOffset, int numChars) {
Objects.requireNonNull(src);
Objects.requireNonNull(dstEncoding);
Objects.requireNonNull(dst);
Objects.checkFromIndexSize(srcIndex, numChars, src.length());
return AbstractMemorySegmentImpl.copy(src, dstEncoding, srcIndex, dst, dstOffset, numChars);
}
/**
* Finds and returns the relative offset, in bytes, of the first mismatch between the
* source and the destination segments. More specifically, the bytes at offset

View File

@ -111,7 +111,8 @@ public interface SegmentAllocator {
* If the given string contains any {@code '\0'} characters, they will be
* copied as well. This means that, depending on the method used to read
* the string, such as {@link MemorySegment#getString(long)}, the string
* will appear truncated when read again.
* will appear truncated when read again. The string can be read without
* truncation using {@link MemorySegment#getString(long, Charset, long)}.
*
* @param str the Java string to be converted into a C string
* @param charset the charset used to {@linkplain Charset#newEncoder() encode} the
@ -137,10 +138,10 @@ public interface SegmentAllocator {
int termCharSize = StringSupport.CharsetKind.of(charset).terminatorCharSize();
MemorySegment segment;
int length;
if (StringSupport.bytesCompatible(str, charset)) {
if (StringSupport.bytesCompatible(str, charset, 0, str.length())) {
length = str.length();
segment = allocateNoInit((long) length + termCharSize);
StringSupport.copyToSegmentRaw(str, segment, 0);
StringSupport.copyToSegmentRaw(str, segment, 0, 0, str.length());
} else {
byte[] bytes = str.getBytes(charset);
length = bytes.length;
@ -153,6 +154,53 @@ public interface SegmentAllocator {
return segment;
}
/**
* Encodes a Java string using the provided charset and stores the resulting
* byte array into a memory segment.
* <p>
* This method always replaces malformed-input and unmappable-character
* sequences with this charset's default replacement byte array. The
* {@link java.nio.charset.CharsetEncoder} class should be used when more
* control over the encoding process is required.
* <p>
* If the given string contains any {@code '\0'} characters, they will be
* copied as well. This means that, depending on the method used to read
* the string, such as {@link MemorySegment#getString(long)}, the string
* will appear truncated when read again. The string can be read without
* truncation using {@link MemorySegment#getString(long, Charset, long)}.
*
* @param str the Java string to be encoded
* @param charset the charset used to {@linkplain Charset#newEncoder() encode} the
* string bytes
* @param srcIndex the starting index of the source string
* @param numChars the number of characters to be copied
* @return a new native segment containing the encoded string
* @throws IndexOutOfBoundsException if either {@code srcIndex} or {@code numChars} are {@code < 0}
* @throws IndexOutOfBoundsException if {@code srcIndex > str.length() - numChars}
*
* @implSpec The default implementation for this method copies the contents of the
* provided Java string into a new memory segment obtained by calling
* {@code this.allocate(B)}, where {@code B} is the size, in bytes, of
* the string encoded using the provided charset
* (e.g. {@code str.getBytes(charset).length});
*/
@ForceInline
default MemorySegment allocateFrom(String str, Charset charset, int srcIndex, int numChars) {
Objects.requireNonNull(charset);
Objects.requireNonNull(str);
Objects.checkFromIndexSize(srcIndex, numChars, str.length());
MemorySegment segment;
if (StringSupport.bytesCompatible(str, charset, srcIndex, numChars)) {
segment = allocateNoInit(numChars);
StringSupport.copyToSegmentRaw(str, segment, 0, srcIndex, numChars);
} else {
byte[] bytes = str.substring(srcIndex, srcIndex + numChars).getBytes(charset);
segment = allocateNoInit(bytes.length);
MemorySegment.copy(bytes, 0, segment, ValueLayout.JAVA_BYTE, 0, bytes.length);
}
return segment;
}
/**
* {@return a new memory segment initialized with the provided byte value}
* <p>

View File

@ -634,10 +634,10 @@ public interface JavaLangAccess {
/**
* Copy the string bytes to an existing segment, avoiding intermediate copies.
*/
void copyToSegmentRaw(String string, MemorySegment segment, long offset);
void copyToSegmentRaw(String string, MemorySegment segment, long offset, int srcIndex, int srcLength);
/**
* Are the string bytes compatible with the given charset?
*/
boolean bytesCompatible(String string, Charset charset);
boolean bytesCompatible(String string, Charset charset, int srcIndex, int numChars);
}

View File

@ -551,6 +551,13 @@ public abstract sealed class AbstractMemorySegmentImpl
unsafeGetOffset() == that.unsafeGetOffset();
}
@Override
public String getString(long offset, Charset charset, long byteLength) {
Utils.checkNonNegativeArgument(byteLength, "byteLength");
Objects.requireNonNull(charset);
return StringSupport.read(this, offset, charset, byteLength);
}
@Override
public int hashCode() {
return Objects.hash(
@ -702,6 +709,16 @@ public abstract sealed class AbstractMemorySegmentImpl
}
}
@ForceInline
public static long copy(String src, Charset dstEncoding, int srcIndex, MemorySegment dst, long dstOffset, int numChars) {
Objects.requireNonNull(src);
Objects.requireNonNull(dstEncoding);
Objects.requireNonNull(dst);
AbstractMemorySegmentImpl destImpl = (AbstractMemorySegmentImpl)dst;
return StringSupport.copyBytes(src, destImpl, dstEncoding, dstOffset, srcIndex, numChars);
}
// accessors
@ForceInline

View File

@ -30,11 +30,14 @@ import jdk.internal.access.SharedSecrets;
import jdk.internal.misc.ScopedMemoryAccess;
import jdk.internal.util.Architecture;
import jdk.internal.util.ArraysSupport;
import jdk.internal.util.Preconditions;
import jdk.internal.vm.annotation.ForceInline;
import java.lang.foreign.MemorySegment;
import java.lang.reflect.Array;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset;
import java.util.Objects;
import static java.lang.foreign.ValueLayout.*;
@ -58,6 +61,27 @@ public final class StringSupport {
};
}
@ForceInline
public static String read(AbstractMemorySegmentImpl segment, long offset, Charset charset, long length) {
return readBytes(segment, offset, charset, length);
}
@ForceInline
public static String readBytes(AbstractMemorySegmentImpl segment, long offset, Charset charset, long length) {
if (length > Integer.MAX_VALUE) {
throw new IllegalArgumentException("Required length exceeds implementation limit");
}
final int lengthBytes = (int) length;
final byte[] bytes = new byte[lengthBytes];
MemorySegment.copy(segment, JAVA_BYTE, offset, bytes, 0, lengthBytes);
try {
return JAVA_LANG_ACCESS.uncheckedNewStringOrThrow(bytes, charset);
} catch (CharacterCodingException _) {
// use replacement characters for malformed input
return new String(bytes, charset);
}
}
@ForceInline
public static void write(AbstractMemorySegmentImpl segment, long offset, Charset charset, String string) {
switch (CharsetKind.of(charset)) {
@ -70,14 +94,7 @@ public final class StringSupport {
@ForceInline
private static String readByte(AbstractMemorySegmentImpl segment, long offset, Charset charset) {
final int len = strlenByte(segment, offset, segment.byteSize());
final byte[] bytes = new byte[len];
MemorySegment.copy(segment, JAVA_BYTE, offset, bytes, 0, len);
try {
return JAVA_LANG_ACCESS.uncheckedNewStringOrThrow(bytes, charset);
} catch (CharacterCodingException _) {
// use replacement characters for malformed input
return new String(bytes, charset);
}
return readBytes(segment, offset, charset, len);
}
@ForceInline
@ -89,14 +106,7 @@ public final class StringSupport {
@ForceInline
private static String readShort(AbstractMemorySegmentImpl segment, long offset, Charset charset) {
int len = strlenShort(segment, offset, segment.byteSize());
byte[] bytes = new byte[len];
MemorySegment.copy(segment, JAVA_BYTE, offset, bytes, 0, len);
try {
return JAVA_LANG_ACCESS.uncheckedNewStringOrThrow(bytes, charset);
} catch (CharacterCodingException _) {
// use replacement characters for malformed input
return new String(bytes, charset);
}
return readBytes(segment, offset, charset, len);
}
@ForceInline
@ -108,14 +118,7 @@ public final class StringSupport {
@ForceInline
private static String readInt(AbstractMemorySegmentImpl segment, long offset, Charset charset) {
int len = strlenInt(segment, offset, segment.byteSize());
byte[] bytes = new byte[len];
MemorySegment.copy(segment, JAVA_BYTE, offset, bytes, 0, len);
try {
return JAVA_LANG_ACCESS.uncheckedNewStringOrThrow(bytes, charset);
} catch (CharacterCodingException _) {
// use replacement characters for malformed input
return new String(bytes, charset);
}
return readBytes(segment, offset, charset, len);
}
@ForceInline
@ -345,22 +348,26 @@ public final class StringSupport {
}
}
public static boolean bytesCompatible(String string, Charset charset) {
return JAVA_LANG_ACCESS.bytesCompatible(string, charset);
public static boolean bytesCompatible(String string, Charset charset, int srcIndex, int numChars) {
return JAVA_LANG_ACCESS.bytesCompatible(string, charset, srcIndex, numChars);
}
public static int copyBytes(String string, MemorySegment segment, Charset charset, long offset) {
if (bytesCompatible(string, charset)) {
copyToSegmentRaw(string, segment, offset);
return string.length();
return copyBytes(string, segment, charset, offset, 0, string.length());
}
public static int copyBytes(String string, MemorySegment segment, Charset charset, long offset, int srcIndex, int numChars) {
if (bytesCompatible(string, charset, srcIndex, numChars)) {
copyToSegmentRaw(string, segment, offset, srcIndex, numChars);
return numChars;
} else {
byte[] bytes = string.getBytes(charset);
byte[] bytes = string.substring(srcIndex, srcIndex + numChars).getBytes(charset);
MemorySegment.copy(bytes, 0, segment, JAVA_BYTE, offset, bytes.length);
return bytes.length;
}
}
public static void copyToSegmentRaw(String string, MemorySegment segment, long offset) {
JAVA_LANG_ACCESS.copyToSegmentRaw(string, segment, offset);
public static void copyToSegmentRaw(String string, MemorySegment segment, long offset, int srcIndex, int srcLength) {
JAVA_LANG_ACCESS.copyToSegmentRaw(string, segment, offset, srcIndex, srcLength);
}
}

View File

@ -37,6 +37,7 @@ import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Random;
import java.util.Set;
import java.util.function.UnaryOperator;
import jdk.internal.foreign.AbstractMemorySegmentImpl;
@ -102,6 +103,140 @@ public class TestStringEncoding {
}
}
@Test(dataProvider = "strings")
public void testStringsLength(String testString) {
if (!testString.isEmpty()) {
for (Charset charset : Charset.availableCharsets().values()) {
if (charset.canEncode()) {
for (Arena arena : arenas()) {
try (arena) {
MemorySegment text = arena.allocateFrom(testString, charset, 0, testString.length());
long length = text.byteSize();
assertEquals(length, testString.getBytes(charset).length);
String roundTrip = text.getString(0, charset, length);
if (charset.newEncoder().canEncode(testString)) {
assertEquals(roundTrip, testString);
}
}
}
}
}
}
}
@Test(dataProvider = "strings")
public void testStringsCopy(String testString) {
if (!testString.isEmpty()) {
for (Charset charset : Charset.availableCharsets().values()) {
if (charset.canEncode()) {
for (Arena arena : arenas()) {
try (arena) {
byte[] bytes = testString.getBytes(charset);
MemorySegment text = arena.allocate(JAVA_BYTE, bytes.length);
MemorySegment.copy(testString, charset, 0, text, 0, testString.length());
String roundTrip = text.getString(0, charset, bytes.length);
if (charset.newEncoder().canEncode(testString)) {
assertEquals(roundTrip, testString);
}
}
}
}
}
}
}
@Test
public void testStringsLengthNegative() {
try (Arena arena = Arena.ofConfined()) {
var segment = arena.allocateFrom("abc");
assertThrows(IllegalArgumentException.class, () -> segment.getString(1, StandardCharsets.UTF_8, -1));
}
}
@Test
public void testCopyThrows() {
try (Arena arena = Arena.ofConfined()) {
String testString = "abc";
String testString_notBytesCompatible = "snowman \u26C4";
MemorySegment text = arena.allocate(JAVA_BYTE, 3);
MemorySegment text_notBytesCompatible = arena.allocate(JAVA_BYTE,
testString_notBytesCompatible.getBytes(StandardCharsets.UTF_8).length);
MemorySegment.copy(testString, StandardCharsets.UTF_8, 0, text, 0, testString.length());
MemorySegment.copy(testString_notBytesCompatible, StandardCharsets.UTF_8, 0,
text_notBytesCompatible, 0,
testString_notBytesCompatible.length());
// srcIndex < 0
assertThrows(IndexOutOfBoundsException.class, () ->
MemorySegment.copy(testString, StandardCharsets.UTF_8, -1, text, 0, testString.length()));
// dstOffset < 0
assertThrows(IndexOutOfBoundsException.class, () ->
MemorySegment.copy(testString, StandardCharsets.UTF_8, 0, text, -1, testString.length()));
// numChars < 0
assertThrows(IndexOutOfBoundsException.class, () ->
MemorySegment.copy(testString, StandardCharsets.UTF_8, 0, text, 0, -1));
// srcIndex + numChars > length
assertThrows(IndexOutOfBoundsException.class, () ->
MemorySegment.copy(testString, StandardCharsets.UTF_8, 1, text, 0, testString.length()));
assertThrows(IndexOutOfBoundsException.class, () ->
MemorySegment.copy(testString, StandardCharsets.UTF_8, 0, text, 0, testString.length() + 1));
// dstOffset > byteSize() - B
assertThrows(IndexOutOfBoundsException.class, () ->
MemorySegment.copy(testString, StandardCharsets.UTF_8, 0, text, 1, testString.length()));
// srcIndex + numChars overflows
assertThrows(IndexOutOfBoundsException.class, () ->
MemorySegment.copy(testString, StandardCharsets.UTF_8, Integer.MAX_VALUE, text, 0, Integer.MAX_VALUE + 3));
assertThrows(IndexOutOfBoundsException.class, () ->
MemorySegment.copy(testString_notBytesCompatible, StandardCharsets.UTF_8, Integer.MAX_VALUE, text, 0, Integer.MAX_VALUE + 3));
}
}
@Test
public void testAllocateFromThrows() {
try (Arena arena = Arena.ofConfined()) {
String testString = "abc";
String testString_notBytesCompatible = "snowman \u26C4";
arena.allocateFrom(testString, StandardCharsets.UTF_8, 0, testString.length());
arena.allocateFrom(testString, StandardCharsets.UTF_8, 2, 1);
// srcIndex < 0
assertThrows(IndexOutOfBoundsException.class, () ->
arena.allocateFrom(testString, StandardCharsets.UTF_8, -1, testString.length()));
// numChars < 0
assertThrows(IndexOutOfBoundsException.class, () ->
arena.allocateFrom(testString, StandardCharsets.UTF_8, 0, -1));
// srcIndex + numChars > length
assertThrows(IndexOutOfBoundsException.class, () ->
arena.allocateFrom(testString, StandardCharsets.UTF_8, 0, testString.length() + 1));
assertThrows(IndexOutOfBoundsException.class, () ->
arena.allocateFrom(testString, StandardCharsets.UTF_8, 1, testString.length()));
// srcIndex + numChars overflows
assertThrows(IndexOutOfBoundsException.class, () ->
arena.allocateFrom(testString, StandardCharsets.UTF_8, 3, Integer.MAX_VALUE));
assertThrows(IndexOutOfBoundsException.class, () -> arena.allocateFrom(
testString_notBytesCompatible, StandardCharsets.UTF_8, 3, Integer.MAX_VALUE));
}
}
@Test
public void testGetStringThrows() {
try (Arena arena = Arena.ofConfined()) {
String testString = "abc";
MemorySegment text = arena.allocateFrom(testString, StandardCharsets.UTF_8, 0, testString.length());
text.getString(0, StandardCharsets.UTF_8, 3);
// unsupported string size
assertThrows(IllegalArgumentException.class, () ->
text.getString(0, StandardCharsets.UTF_8, Integer.MAX_VALUE + 1L));
// offset < 0
assertThrows(IndexOutOfBoundsException.class, () ->
text.getString(-1, StandardCharsets.UTF_8, 3));
// offset > byteSize() - length
assertThrows(IndexOutOfBoundsException.class, () ->
text.getString(1, StandardCharsets.UTF_8, 3));
// length < 0
assertThrows(IllegalArgumentException.class, () ->
text.getString(0, StandardCharsets.UTF_8, -1));
}
}
@Test(dataProvider = "strings")
public void testStringsHeap(String testString) {
for (Charset charset : singleByteCharsets()) {
@ -221,6 +356,74 @@ public class TestStringEncoding {
}
}
@Test(dataProvider = "strings")
public void testSubstringGetString(String testString) {
if (testString.length() < 3 || !containsOnlyRegularCharacters(testString)) {
return;
}
for (var charset : singleByteCharsets()) {
for (var arena: arenas()) {
try (arena) {
MemorySegment text = arena.allocateFrom(testString, charset, 0, testString.length());
for (int srcIndex = 0; srcIndex <= testString.length(); srcIndex++) {
for (int numChars = 0; numChars <= testString.length() - srcIndex; numChars++) {
// this test assumes single-byte charsets
String roundTrip = text.getString(srcIndex, charset, numChars);
String substring = testString.substring(srcIndex, srcIndex + numChars);
assertEquals(roundTrip, substring);
}
}
}
}
}
}
@Test(dataProvider = "strings")
public void testSubstringAllocate(String testString) {
if (testString.length() < 3 || !containsOnlyRegularCharacters(testString)) {
return;
}
for (var charset : singleByteCharsets()) {
for (var arena: arenas()) {
try (arena) {
for (int srcIndex = 0; srcIndex <= testString.length(); srcIndex++) {
for (int numChars = 0; numChars <= testString.length() - srcIndex; numChars++) {
MemorySegment text = arena.allocateFrom(testString, charset, srcIndex, numChars);
String substring = testString.substring(srcIndex, srcIndex + numChars);
assertEquals(text.byteSize(), substring.getBytes(charset).length);
String roundTrip = text.getString(0, charset, text.byteSize());
assertEquals(roundTrip, substring);
}
}
}
}
}
}
@Test(dataProvider = "strings")
public void testSubstringCopy(String testString) {
if (testString.length() < 3 || !containsOnlyRegularCharacters(testString)) {
return;
}
for (var charset : singleByteCharsets()) {
for (var arena: arenas()) {
try (arena) {
for (int srcIndex = 0; srcIndex <= testString.length(); srcIndex++) {
for (int numChars = 0; numChars <= testString.length() - srcIndex; numChars++) {
String substring = testString.substring(srcIndex, srcIndex + numChars);
long length = substring.getBytes(charset).length;
MemorySegment text = arena.allocate(JAVA_BYTE, length);
long copied = MemorySegment.copy(testString, charset, srcIndex, text, 0, numChars);
String roundTrip = text.getString(0, charset, length);
assertEquals(roundTrip, substring);
assertEquals(copied, length);
}
}
}
}
}
}
private static final MemoryLayout CHAR_POINTER = ADDRESS
.withTargetLayout(MemoryLayout.sequenceLayout(Long.MAX_VALUE, JAVA_BYTE));
private static final Linker LINKER = Linker.nativeLinker();
@ -402,7 +605,7 @@ public class TestStringEncoding {
{""},
{"X"},
{"12345"},
{"yen \u00A5"},
{"section \u00A7"},
{"snowman \u26C4"},
{"rainbow \uD83C\uDF08"},
{"0"},

View File

@ -0,0 +1,94 @@
/*
* Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package org.openjdk.bench.java.lang.foreign;
import static java.lang.foreign.ValueLayout.JAVA_BYTE;
import static java.nio.charset.StandardCharsets.UTF_8;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.annotations.Param;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.Warmup;
import java.lang.foreign.Arena;
import java.lang.foreign.MemorySegment;
import java.util.concurrent.TimeUnit;
@BenchmarkMode(Mode.AverageTime)
@Warmup(iterations = 5, time = 500, timeUnit = TimeUnit.MILLISECONDS)
@Measurement(iterations = 10, time = 500, timeUnit = TimeUnit.MILLISECONDS)
@State(Scope.Benchmark)
@OutputTimeUnit(TimeUnit.NANOSECONDS)
@Fork(value = 3)
public class FromJavaStringTest {
private String str;
private MemorySegment strSegment;
private int lengthBytes;
@Param({"5", "20", "100", "200", "451"})
int size;
@Setup
public void setup() {
var arena = Arena.ofAuto();
while (LOREM.length() < size) {
LOREM += LOREM;
}
str = LOREM.substring(0, size);
strSegment = arena.allocateFrom(str);
lengthBytes = str.getBytes(UTF_8).length;
}
@Benchmark
public void segment_setString() {
strSegment.setString(0, str, UTF_8);
}
@Benchmark
public void segment_copyStringRaw() {
MemorySegment.copy(str, UTF_8, 0, strSegment, 0, str.length());
}
@Benchmark
public void segment_copyStringBytes() {
byte[] bytes = str.getBytes(UTF_8);
MemorySegment.copy(bytes, 0, strSegment, JAVA_BYTE, 0, bytes.length);
}
static String LOREM =
"""
Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et
dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip
ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu
fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt
mollit anim id est laborum.
""";
}

View File

@ -22,6 +22,9 @@
*/
package org.openjdk.bench.java.lang.foreign;
import static java.lang.foreign.ValueLayout.JAVA_BYTE;
import static java.nio.charset.StandardCharsets.UTF_8;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Fork;
@ -47,6 +50,7 @@ import java.util.concurrent.TimeUnit;
public class ToJavaStringTest {
private MemorySegment strSegment;
private int length;
@Param({"5", "20", "100", "200", "451"})
int size;
@ -61,19 +65,33 @@ public class ToJavaStringTest {
while (LOREM.length() < size) {
LOREM += LOREM;
}
strSegment = arena.allocateFrom(LOREM.substring(0, size));
var s = LOREM.substring(0, size);
strSegment = arena.allocateFrom(s);
length = s.getBytes(UTF_8).length;
}
@Benchmark
public String panama_readString() {
public String segment_getString() {
return strSegment.getString(0);
}
@Benchmark
public String segment_getStringLength() {
return strSegment.getString(0, UTF_8, length);
}
@Benchmark
public String jni_readString() {
return readString(strSegment.address());
}
@Benchmark
public String segment_copyStringBytes() {
byte[] bytes = new byte[length];
MemorySegment.copy(strSegment, JAVA_BYTE, 0, bytes, 0, length);
return new String(bytes, UTF_8);
}
static native String readString(long addr);
static String LOREM = """