8369564: Provide a MemorySegment API to read strings with known lengths

Co-authored-by: Per Minborg <pminborg@openjdk.org> Reviewed-by: jvernee, mcimadamore
2026-03-14 09:53:18 +00:00 · 2026-01-12 15:22:42 +00:00 · 2026-01-12 15:22:42 +00:00 · d433ce5236
commit d433ce5236
parent 556bddfd94
10 changed files with 523 additions and 55 deletions
--- a/src/java.base/share/classes/java/lang/String.java
+++ b/src/java.base/share/classes/java/lang/String.java
@ -2045,19 +2045,26 @@ public final class String
        return encode(Charset.defaultCharset(), coder(), value);
    }

-    boolean bytesCompatible(Charset charset) {
+    boolean bytesCompatible(Charset charset, int srcIndex, int numChars) {
        if (isLatin1()) {
            if (charset == ISO_8859_1.INSTANCE) {
                return true; // ok, same encoding
            } else if (charset == UTF_8.INSTANCE || charset == US_ASCII.INSTANCE) {
-                return !StringCoding.hasNegatives(value, 0, value.length); // ok, if ASCII-compatible
+                return !StringCoding.hasNegatives(value, srcIndex, numChars); // ok, if ASCII-compatible
            }
        }
        return false;
    }

-    void copyToSegmentRaw(MemorySegment segment, long offset) {
-        MemorySegment.copy(value, 0, segment, ValueLayout.JAVA_BYTE, offset, value.length);
+    void copyToSegmentRaw(MemorySegment segment, long offset, int srcIndex, int srcLength) {
+        if (!isLatin1()) {
+            // This method is intended to be used together with bytesCompatible, which currently only supports
+            // latin1 strings. In the future, bytesCompatible could be updated to handle more cases, like
+            // UTF-16 strings (when the platform and charset endianness match, and the String doesn’t contain
+            // unpaired surrogates). If that happens, copyToSegmentRaw should also be updated.
+            throw new IllegalStateException("This string does not support copyToSegmentRaw");
+        }
+        MemorySegment.copy(value, srcIndex, segment, ValueLayout.JAVA_BYTE, offset, srcLength);
    }

    /**
--- a/src/java.base/share/classes/java/lang/System.java
+++ b/src/java.base/share/classes/java/lang/System.java
@ -2331,13 +2331,13 @@ public final class System {
            }

            @Override
-            public void copyToSegmentRaw(String string, MemorySegment segment, long offset) {
-                string.copyToSegmentRaw(segment, offset);
+            public void copyToSegmentRaw(String string, MemorySegment segment, long offset, int srcIndex, int srcLength) {
+                string.copyToSegmentRaw(segment, offset, srcIndex, srcLength);
            }

            @Override
-            public boolean bytesCompatible(String string, Charset charset) {
-                return string.bytesCompatible(charset);
+            public boolean bytesCompatible(String string, Charset charset, int srcIndex, int numChars) {
+                return string.bytesCompatible(charset, srcIndex, numChars);
            }
        });
    }
--- a/src/java.base/share/classes/java/lang/foreign/MemorySegment.java
+++ b/src/java.base/share/classes/java/lang/foreign/MemorySegment.java
@ -1296,12 +1296,7 @@ public sealed interface MemorySegment permits AbstractMemorySegmentImpl {
     * over the decoding process is required.
     * <p>
     * Getting a string from a segment with a known byte offset and
-     * known byte length can be done like so:
-     * {@snippet lang=java :
-     *     byte[] bytes = new byte[length];
-     *     MemorySegment.copy(segment, JAVA_BYTE, offset, bytes, 0, length);
-     *     return new String(bytes, charset);
-     * }
+     * known byte length can be done using {@link #getString(long, Charset, long)}.
     *
     * @param offset  offset in bytes (relative to this segment address) at which this
     *                access operation will occur
@ -1328,6 +1323,40 @@ public sealed interface MemorySegment permits AbstractMemorySegmentImpl {
     */
    String getString(long offset, Charset charset);

+    /**
+     * Reads a string from this segment at the given offset, using the provided length
+     * and charset.
+     * <p>
+     * This method always replaces malformed-input and unmappable-character
+     * sequences with this charset's default replacement string. The {@link
+     * java.nio.charset.CharsetDecoder} class should be used when more control
+     * over the decoding process is required.
+     * <p>
+     * If the string contains any {@code '\0'} characters, they will be read as well.
+     * This differs from {@link #getString(long, Charset)}, which will only read up
+     * to the first {@code '\0'}, resulting in truncation for string data that contains
+     * the {@code '\0'} character.
+     *
+     * @param offset  offset in bytes (relative to this segment address) at which this
+     *                access operation will occur
+     * @param charset the charset used to {@linkplain Charset#newDecoder() decode} the
+     *                string bytes
+     * @param byteLength length, in bytes, of the region of memory to read and decode into
+     *                a string
+     * @return a Java string constructed from the bytes read from the given starting
+     *         address up to the given length
+     * @throws IllegalArgumentException  if the size of the string is greater than the
+     *         largest string supported by the platform
+     * @throws IndexOutOfBoundsException if {@code offset < 0}
+     * @throws IndexOutOfBoundsException if {@code offset > byteSize() - byteLength}
+     * @throws IllegalStateException if the {@linkplain #scope() scope} associated with
+     *         this segment is not {@linkplain Scope#isAlive() alive}
+     * @throws WrongThreadException if this method is called from a thread {@code T},
+     *         such that {@code isAccessibleBy(T) == false}
+     * @throws IllegalArgumentException if {@code byteLength < 0}
+     */
+    String getString(long offset, Charset charset, long byteLength);
+
    /**
     * Writes the given string into this segment at the given offset, converting it to
     * a null-terminated byte sequence using the {@linkplain StandardCharsets#UTF_8 UTF-8}
@ -1366,7 +1395,8 @@ public sealed interface MemorySegment permits AbstractMemorySegmentImpl {
     * If the given string contains any {@code '\0'} characters, they will be
     * copied as well. This means that, depending on the method used to read
     * the string, such as {@link MemorySegment#getString(long)}, the string
-     * will appear truncated when read again.
+     * will appear truncated when read again. The string can be read without
+     * truncation using {@link #getString(long, Charset, long)}.
     *
     * @param offset  offset in bytes (relative to this segment address) at which this
     *                access operation will occur, the final address of this write
@ -2606,6 +2636,50 @@ public sealed interface MemorySegment permits AbstractMemorySegmentImpl {
                elementCount);
    }

+    /**
+     * Copies the byte sequence of the given string encoded using the provided charset
+     * to the destination segment.
+     * <p>
+     * This method always replaces malformed-input and unmappable-character
+     * sequences with this charset's default replacement string. The {@link
+     * java.nio.charset.CharsetDecoder} class should be used when more control
+     * over the decoding process is required.
+     * <p>
+     * If the given string contains any {@code '\0'} characters, they will be
+     * copied as well. This means that, depending on the method used to read
+     * the string, such as {@link MemorySegment#getString(long)}, the string
+     * will appear truncated when read again. The string can be read without
+     * truncation using {@link #getString(long, Charset, long)}.
+     *
+     * @param src      the Java string to be written into the destination segment
+     * @param dstEncoding the charset used to {@linkplain Charset#newEncoder() encode}
+     *                 the string bytes.
+     * @param srcIndex the starting character index of the source string
+     * @param dst      the destination segment
+     * @param dstOffset the starting offset, in bytes, of the destination segment
+     * @param numChars the number of characters to be copied
+     * @throws IllegalStateException if the {@linkplain #scope() scope} associated with
+     *         {@code dst} is not {@linkplain Scope#isAlive() alive}
+     * @throws WrongThreadException if this method is called from a thread {@code T},
+     *         such that {@code dst.isAccessibleBy(T) == false}
+     * @throws IndexOutOfBoundsException if either {@code srcIndex}, {@code numChars}, or {@code dstOffset}
+     *         are {@code < 0}
+     * @throws IndexOutOfBoundsException if {@code srcIndex > src.length() - numChars}
+     * @throws IllegalArgumentException if {@code dst} is {@linkplain #isReadOnly() read-only}
+     * @throws IndexOutOfBoundsException if {@code dstOffset > dstSegment.byteSize() - B} where {@code B} is the size,
+     *         in bytes, of the substring of {@code src} encoded using the given charset
+     * @return the number of copied bytes.
+     */
+    @ForceInline
+    static long copy(String src, Charset dstEncoding, int srcIndex, MemorySegment dst, long dstOffset, int numChars) {
+        Objects.requireNonNull(src);
+        Objects.requireNonNull(dstEncoding);
+        Objects.requireNonNull(dst);
+        Objects.checkFromIndexSize(srcIndex, numChars, src.length());
+
+        return AbstractMemorySegmentImpl.copy(src, dstEncoding, srcIndex, dst, dstOffset, numChars);
+    }
+
    /**
     * Finds and returns the relative offset, in bytes, of the first mismatch between the
     * source and the destination segments. More specifically, the bytes at offset
--- a/src/java.base/share/classes/java/lang/foreign/SegmentAllocator.java
+++ b/src/java.base/share/classes/java/lang/foreign/SegmentAllocator.java
@ -111,7 +111,8 @@ public interface SegmentAllocator {
     * If the given string contains any {@code '\0'} characters, they will be
     * copied as well. This means that, depending on the method used to read
     * the string, such as {@link MemorySegment#getString(long)}, the string
-     * will appear truncated when read again.
+     * will appear truncated when read again. The string can be read without
+     * truncation using {@link MemorySegment#getString(long, Charset, long)}.
     *
     * @param str     the Java string to be converted into a C string
     * @param charset the charset used to {@linkplain Charset#newEncoder() encode} the
@ -137,10 +138,10 @@ public interface SegmentAllocator {
        int termCharSize = StringSupport.CharsetKind.of(charset).terminatorCharSize();
        MemorySegment segment;
        int length;
-        if (StringSupport.bytesCompatible(str, charset)) {
+        if (StringSupport.bytesCompatible(str, charset, 0, str.length())) {
            length = str.length();
            segment = allocateNoInit((long) length + termCharSize);
-            StringSupport.copyToSegmentRaw(str, segment, 0);
+            StringSupport.copyToSegmentRaw(str, segment, 0, 0, str.length());
        } else {
            byte[] bytes = str.getBytes(charset);
            length = bytes.length;
@ -153,6 +154,53 @@ public interface SegmentAllocator {
        return segment;
    }

+    /**
+     * Encodes a Java string using the provided charset and stores the resulting
+     * byte array into a memory segment.
+     * <p>
+     * This method always replaces malformed-input and unmappable-character
+     * sequences with this charset's default replacement byte array. The
+     * {@link java.nio.charset.CharsetEncoder} class should be used when more
+     * control over the encoding process is required.
+     * <p>
+     * If the given string contains any {@code '\0'} characters, they will be
+     * copied as well. This means that, depending on the method used to read
+     * the string, such as {@link MemorySegment#getString(long)}, the string
+     * will appear truncated when read again. The string can be read without
+     * truncation using {@link MemorySegment#getString(long, Charset, long)}.
+     *
+     * @param str      the Java string to be encoded
+     * @param charset  the charset used to {@linkplain Charset#newEncoder() encode} the
+     *                 string bytes
+     * @param srcIndex the starting index of the source string
+     * @param numChars the number of characters to be copied
+     * @return a new native segment containing the encoded string
+     * @throws IndexOutOfBoundsException if either {@code srcIndex} or {@code numChars} are {@code < 0}
+     * @throws IndexOutOfBoundsException if {@code srcIndex > str.length() - numChars}
+     *
+     * @implSpec The default implementation for this method copies the contents of the
+     *           provided Java string into a new memory segment obtained by calling
+     *           {@code this.allocate(B)}, where {@code B} is the size, in bytes, of
+     *           the string encoded using the provided charset
+     *           (e.g. {@code str.getBytes(charset).length});
+     */
+    @ForceInline
+    default MemorySegment allocateFrom(String str, Charset charset, int srcIndex, int numChars) {
+        Objects.requireNonNull(charset);
+        Objects.requireNonNull(str);
+        Objects.checkFromIndexSize(srcIndex, numChars, str.length());
+        MemorySegment segment;
+        if (StringSupport.bytesCompatible(str, charset, srcIndex, numChars)) {
+            segment = allocateNoInit(numChars);
+            StringSupport.copyToSegmentRaw(str, segment, 0, srcIndex, numChars);
+        } else {
+            byte[] bytes = str.substring(srcIndex, srcIndex + numChars).getBytes(charset);
+            segment = allocateNoInit(bytes.length);
+            MemorySegment.copy(bytes, 0, segment, ValueLayout.JAVA_BYTE, 0, bytes.length);
+        }
+        return segment;
+    }
+
    /**
     * {@return a new memory segment initialized with the provided byte value}
     * <p>
--- a/src/java.base/share/classes/jdk/internal/access/JavaLangAccess.java
+++ b/src/java.base/share/classes/jdk/internal/access/JavaLangAccess.java
@ -634,10 +634,10 @@ public interface JavaLangAccess {
    /**
     * Copy the string bytes to an existing segment, avoiding intermediate copies.
     */
-    void copyToSegmentRaw(String string, MemorySegment segment, long offset);
+    void copyToSegmentRaw(String string, MemorySegment segment, long offset, int srcIndex, int srcLength);

    /**
     * Are the string bytes compatible with the given charset?
     */
-    boolean bytesCompatible(String string, Charset charset);
+    boolean bytesCompatible(String string, Charset charset, int srcIndex, int numChars);
 }
--- a/src/java.base/share/classes/jdk/internal/foreign/AbstractMemorySegmentImpl.java
+++ b/src/java.base/share/classes/jdk/internal/foreign/AbstractMemorySegmentImpl.java
@ -551,6 +551,13 @@ public abstract sealed class AbstractMemorySegmentImpl
                unsafeGetOffset() == that.unsafeGetOffset();
    }

+    @Override
+    public String getString(long offset, Charset charset, long byteLength) {
+        Utils.checkNonNegativeArgument(byteLength, "byteLength");
+        Objects.requireNonNull(charset);
+        return StringSupport.read(this, offset, charset, byteLength);
+    }
+
    @Override
    public int hashCode() {
        return Objects.hash(
@ -702,6 +709,16 @@ public abstract sealed class AbstractMemorySegmentImpl
        }
    }

+    @ForceInline
+    public static long copy(String src, Charset dstEncoding, int srcIndex, MemorySegment dst, long dstOffset, int numChars) {
+        Objects.requireNonNull(src);
+        Objects.requireNonNull(dstEncoding);
+        Objects.requireNonNull(dst);
+
+        AbstractMemorySegmentImpl destImpl = (AbstractMemorySegmentImpl)dst;
+        return StringSupport.copyBytes(src, destImpl, dstEncoding, dstOffset, srcIndex, numChars);
+    }
+
    // accessors

    @ForceInline
--- a/src/java.base/share/classes/jdk/internal/foreign/StringSupport.java
+++ b/src/java.base/share/classes/jdk/internal/foreign/StringSupport.java
@ -30,11 +30,14 @@ import jdk.internal.access.SharedSecrets;
 import jdk.internal.misc.ScopedMemoryAccess;
 import jdk.internal.util.Architecture;
 import jdk.internal.util.ArraysSupport;
+import jdk.internal.util.Preconditions;
 import jdk.internal.vm.annotation.ForceInline;

 import java.lang.foreign.MemorySegment;
+import java.lang.reflect.Array;
 import java.nio.charset.CharacterCodingException;
 import java.nio.charset.Charset;
+import java.util.Objects;

 import static java.lang.foreign.ValueLayout.*;

@ -58,6 +61,27 @@ public final class StringSupport {
        };
    }

+    @ForceInline
+    public static String read(AbstractMemorySegmentImpl segment, long offset, Charset charset, long length) {
+        return readBytes(segment, offset, charset, length);
+    }
+
+    @ForceInline
+    public static String readBytes(AbstractMemorySegmentImpl segment, long offset, Charset charset, long length) {
+        if (length > Integer.MAX_VALUE) {
+            throw new IllegalArgumentException("Required length exceeds implementation limit");
+        }
+        final int lengthBytes = (int) length;
+        final byte[] bytes = new byte[lengthBytes];
+        MemorySegment.copy(segment, JAVA_BYTE, offset, bytes, 0, lengthBytes);
+        try {
+            return JAVA_LANG_ACCESS.uncheckedNewStringOrThrow(bytes, charset);
+        } catch (CharacterCodingException _) {
+            // use replacement characters for malformed input
+            return new String(bytes, charset);
+        }
+    }
+
    @ForceInline
    public static void write(AbstractMemorySegmentImpl segment, long offset, Charset charset, String string) {
        switch (CharsetKind.of(charset)) {
@ -70,14 +94,7 @@ public final class StringSupport {
    @ForceInline
    private static String readByte(AbstractMemorySegmentImpl segment, long offset, Charset charset) {
        final int len = strlenByte(segment, offset, segment.byteSize());
-        final byte[] bytes = new byte[len];
-        MemorySegment.copy(segment, JAVA_BYTE, offset, bytes, 0, len);
-        try {
-            return JAVA_LANG_ACCESS.uncheckedNewStringOrThrow(bytes, charset);
-        } catch (CharacterCodingException _) {
-            // use replacement characters for malformed input
-            return new String(bytes, charset);
-        }
+        return readBytes(segment, offset, charset, len);
    }

    @ForceInline
@ -89,14 +106,7 @@ public final class StringSupport {
    @ForceInline
    private static String readShort(AbstractMemorySegmentImpl segment, long offset, Charset charset) {
        int len = strlenShort(segment, offset, segment.byteSize());
-        byte[] bytes = new byte[len];
-        MemorySegment.copy(segment, JAVA_BYTE, offset, bytes, 0, len);
-        try {
-            return JAVA_LANG_ACCESS.uncheckedNewStringOrThrow(bytes, charset);
-        } catch (CharacterCodingException _) {
-          // use replacement characters for malformed input
-          return new String(bytes, charset);
-        }
+        return readBytes(segment, offset, charset, len);
    }

    @ForceInline
@ -108,14 +118,7 @@ public final class StringSupport {
    @ForceInline
    private static String readInt(AbstractMemorySegmentImpl segment, long offset, Charset charset) {
        int len = strlenInt(segment, offset, segment.byteSize());
-        byte[] bytes = new byte[len];
-        MemorySegment.copy(segment, JAVA_BYTE, offset, bytes, 0, len);
-        try {
-            return JAVA_LANG_ACCESS.uncheckedNewStringOrThrow(bytes, charset);
-        } catch (CharacterCodingException _) {
-            // use replacement characters for malformed input
-            return new String(bytes, charset);
-        }
+        return readBytes(segment, offset, charset, len);
    }

    @ForceInline
@ -345,22 +348,26 @@ public final class StringSupport {
        }
    }

-    public static boolean bytesCompatible(String string, Charset charset) {
-        return JAVA_LANG_ACCESS.bytesCompatible(string, charset);
+    public static boolean bytesCompatible(String string, Charset charset, int srcIndex, int numChars) {
+        return JAVA_LANG_ACCESS.bytesCompatible(string, charset, srcIndex, numChars);
    }

    public static int copyBytes(String string, MemorySegment segment, Charset charset, long offset) {
-        if (bytesCompatible(string, charset)) {
-            copyToSegmentRaw(string, segment, offset);
-            return string.length();
+        return copyBytes(string, segment, charset, offset, 0, string.length());
+    }
+
+    public static int copyBytes(String string, MemorySegment segment, Charset charset, long offset, int srcIndex, int numChars) {
+        if (bytesCompatible(string, charset, srcIndex, numChars)) {
+            copyToSegmentRaw(string, segment, offset, srcIndex, numChars);
+            return numChars;
        } else {
-            byte[] bytes = string.getBytes(charset);
+            byte[] bytes = string.substring(srcIndex, srcIndex + numChars).getBytes(charset);
            MemorySegment.copy(bytes, 0, segment, JAVA_BYTE, offset, bytes.length);
            return bytes.length;
        }
    }

-    public static void copyToSegmentRaw(String string, MemorySegment segment, long offset) {
-        JAVA_LANG_ACCESS.copyToSegmentRaw(string, segment, offset);
+    public static void copyToSegmentRaw(String string, MemorySegment segment, long offset, int srcIndex, int srcLength) {
+        JAVA_LANG_ACCESS.copyToSegmentRaw(string, segment, offset, srcIndex, srcLength);
    }
 }
--- a/test/jdk/java/foreign/TestStringEncoding.java
+++ b/test/jdk/java/foreign/TestStringEncoding.java
@ -37,6 +37,7 @@ import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
 import java.util.Random;
+import java.util.Set;
 import java.util.function.UnaryOperator;

 import jdk.internal.foreign.AbstractMemorySegmentImpl;
@ -102,6 +103,140 @@ public class TestStringEncoding {
        }
    }

+    @Test(dataProvider = "strings")
+    public void testStringsLength(String testString) {
+        if (!testString.isEmpty()) {
+            for (Charset charset : Charset.availableCharsets().values()) {
+                if (charset.canEncode()) {
+                    for (Arena arena : arenas()) {
+                        try (arena) {
+                            MemorySegment text = arena.allocateFrom(testString, charset, 0, testString.length());
+                            long length = text.byteSize();
+                            assertEquals(length, testString.getBytes(charset).length);
+                            String roundTrip = text.getString(0, charset, length);
+                            if (charset.newEncoder().canEncode(testString)) {
+                                assertEquals(roundTrip, testString);
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    @Test(dataProvider = "strings")
+    public void testStringsCopy(String testString) {
+        if (!testString.isEmpty()) {
+            for (Charset charset : Charset.availableCharsets().values()) {
+                if (charset.canEncode()) {
+                    for (Arena arena : arenas()) {
+                        try (arena) {
+                            byte[] bytes = testString.getBytes(charset);
+                            MemorySegment text = arena.allocate(JAVA_BYTE, bytes.length);
+                            MemorySegment.copy(testString, charset, 0, text, 0, testString.length());
+                            String roundTrip = text.getString(0, charset, bytes.length);
+                            if (charset.newEncoder().canEncode(testString)) {
+                                assertEquals(roundTrip, testString);
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    @Test
+    public void testStringsLengthNegative() {
+        try (Arena arena = Arena.ofConfined()) {
+            var segment = arena.allocateFrom("abc");
+            assertThrows(IllegalArgumentException.class, () -> segment.getString(1, StandardCharsets.UTF_8, -1));
+        }
+    }
+
+    @Test
+    public void testCopyThrows() {
+        try (Arena arena = Arena.ofConfined()) {
+            String testString = "abc";
+            String testString_notBytesCompatible = "snowman \u26C4";
+            MemorySegment text = arena.allocate(JAVA_BYTE, 3);
+            MemorySegment text_notBytesCompatible = arena.allocate(JAVA_BYTE,
+                    testString_notBytesCompatible.getBytes(StandardCharsets.UTF_8).length);
+            MemorySegment.copy(testString, StandardCharsets.UTF_8, 0, text, 0, testString.length());
+            MemorySegment.copy(testString_notBytesCompatible, StandardCharsets.UTF_8, 0,
+                    text_notBytesCompatible, 0,
+                    testString_notBytesCompatible.length());
+            // srcIndex < 0
+            assertThrows(IndexOutOfBoundsException.class, () ->
+                    MemorySegment.copy(testString, StandardCharsets.UTF_8, -1, text, 0, testString.length()));
+            // dstOffset < 0
+            assertThrows(IndexOutOfBoundsException.class, () ->
+                    MemorySegment.copy(testString, StandardCharsets.UTF_8, 0, text, -1, testString.length()));
+            // numChars < 0
+            assertThrows(IndexOutOfBoundsException.class, () ->
+                    MemorySegment.copy(testString, StandardCharsets.UTF_8, 0, text, 0, -1));
+            // srcIndex + numChars > length
+            assertThrows(IndexOutOfBoundsException.class, () ->
+                    MemorySegment.copy(testString, StandardCharsets.UTF_8, 1, text, 0, testString.length()));
+            assertThrows(IndexOutOfBoundsException.class, () ->
+                    MemorySegment.copy(testString, StandardCharsets.UTF_8, 0, text, 0, testString.length() + 1));
+            // dstOffset > byteSize() - B
+            assertThrows(IndexOutOfBoundsException.class, () ->
+                    MemorySegment.copy(testString, StandardCharsets.UTF_8, 0, text, 1, testString.length()));
+            // srcIndex + numChars overflows
+            assertThrows(IndexOutOfBoundsException.class, () ->
+                    MemorySegment.copy(testString, StandardCharsets.UTF_8, Integer.MAX_VALUE, text, 0, Integer.MAX_VALUE + 3));
+            assertThrows(IndexOutOfBoundsException.class, () ->
+                    MemorySegment.copy(testString_notBytesCompatible, StandardCharsets.UTF_8, Integer.MAX_VALUE, text, 0, Integer.MAX_VALUE + 3));
+        }
+    }
+
+    @Test
+    public void testAllocateFromThrows() {
+        try (Arena arena = Arena.ofConfined()) {
+            String testString = "abc";
+            String testString_notBytesCompatible = "snowman \u26C4";
+            arena.allocateFrom(testString, StandardCharsets.UTF_8, 0, testString.length());
+            arena.allocateFrom(testString, StandardCharsets.UTF_8, 2, 1);
+            // srcIndex < 0
+            assertThrows(IndexOutOfBoundsException.class, () ->
+                    arena.allocateFrom(testString, StandardCharsets.UTF_8, -1, testString.length()));
+            // numChars < 0
+            assertThrows(IndexOutOfBoundsException.class, () ->
+                    arena.allocateFrom(testString, StandardCharsets.UTF_8, 0, -1));
+            // srcIndex + numChars > length
+            assertThrows(IndexOutOfBoundsException.class, () ->
+                    arena.allocateFrom(testString, StandardCharsets.UTF_8, 0, testString.length() + 1));
+            assertThrows(IndexOutOfBoundsException.class, () ->
+                    arena.allocateFrom(testString, StandardCharsets.UTF_8, 1, testString.length()));
+            // srcIndex + numChars overflows
+            assertThrows(IndexOutOfBoundsException.class, () ->
+                    arena.allocateFrom(testString, StandardCharsets.UTF_8, 3, Integer.MAX_VALUE));
+            assertThrows(IndexOutOfBoundsException.class, () -> arena.allocateFrom(
+                    testString_notBytesCompatible, StandardCharsets.UTF_8, 3, Integer.MAX_VALUE));
+        }
+    }
+
+    @Test
+    public void testGetStringThrows() {
+        try (Arena arena = Arena.ofConfined()) {
+            String testString = "abc";
+            MemorySegment text = arena.allocateFrom(testString, StandardCharsets.UTF_8, 0, testString.length());
+            text.getString(0, StandardCharsets.UTF_8, 3);
+            // unsupported string size
+            assertThrows(IllegalArgumentException.class, () ->
+                    text.getString(0, StandardCharsets.UTF_8, Integer.MAX_VALUE + 1L));
+            // offset < 0
+            assertThrows(IndexOutOfBoundsException.class, () ->
+                    text.getString(-1, StandardCharsets.UTF_8, 3));
+            // offset > byteSize() - length
+            assertThrows(IndexOutOfBoundsException.class, () ->
+                    text.getString(1, StandardCharsets.UTF_8, 3));
+            // length < 0
+            assertThrows(IllegalArgumentException.class, () ->
+                    text.getString(0, StandardCharsets.UTF_8, -1));
+        }
+    }
+
    @Test(dataProvider = "strings")
    public void testStringsHeap(String testString) {
        for (Charset charset : singleByteCharsets()) {
@ -221,6 +356,74 @@ public class TestStringEncoding {
        }
    }

+    @Test(dataProvider = "strings")
+    public void testSubstringGetString(String testString) {
+        if (testString.length() < 3 || !containsOnlyRegularCharacters(testString)) {
+            return;
+        }
+        for (var charset : singleByteCharsets()) {
+            for (var arena: arenas()) {
+                try (arena) {
+                    MemorySegment text = arena.allocateFrom(testString, charset, 0, testString.length());
+                    for (int srcIndex = 0; srcIndex <= testString.length(); srcIndex++) {
+                        for (int numChars = 0; numChars <= testString.length() - srcIndex; numChars++) {
+                            // this test assumes single-byte charsets
+                            String roundTrip = text.getString(srcIndex, charset, numChars);
+                            String substring = testString.substring(srcIndex, srcIndex + numChars);
+                            assertEquals(roundTrip, substring);
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    @Test(dataProvider = "strings")
+    public void testSubstringAllocate(String testString) {
+        if (testString.length() < 3 || !containsOnlyRegularCharacters(testString)) {
+            return;
+        }
+        for (var charset : singleByteCharsets()) {
+            for (var arena: arenas()) {
+                try (arena) {
+                    for (int srcIndex = 0; srcIndex <= testString.length(); srcIndex++) {
+                        for (int numChars = 0; numChars <= testString.length() - srcIndex; numChars++) {
+                            MemorySegment text = arena.allocateFrom(testString, charset, srcIndex, numChars);
+                            String substring = testString.substring(srcIndex, srcIndex + numChars);
+                            assertEquals(text.byteSize(), substring.getBytes(charset).length);
+                            String roundTrip = text.getString(0, charset, text.byteSize());
+                            assertEquals(roundTrip, substring);
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    @Test(dataProvider = "strings")
+    public void testSubstringCopy(String testString) {
+        if (testString.length() < 3 || !containsOnlyRegularCharacters(testString)) {
+            return;
+        }
+        for (var charset : singleByteCharsets()) {
+            for (var arena: arenas()) {
+                try (arena) {
+                    for (int srcIndex = 0; srcIndex <= testString.length(); srcIndex++) {
+                        for (int numChars = 0; numChars <= testString.length() - srcIndex; numChars++) {
+                            String substring = testString.substring(srcIndex, srcIndex + numChars);
+                            long length = substring.getBytes(charset).length;
+                            MemorySegment text = arena.allocate(JAVA_BYTE, length);
+                            long copied = MemorySegment.copy(testString, charset, srcIndex, text, 0, numChars);
+                            String roundTrip = text.getString(0, charset, length);
+                            assertEquals(roundTrip, substring);
+                            assertEquals(copied, length);
+                        }
+                    }
+                }
+            }
+        }
+    }
+
    private static final MemoryLayout CHAR_POINTER = ADDRESS
            .withTargetLayout(MemoryLayout.sequenceLayout(Long.MAX_VALUE, JAVA_BYTE));
    private static final Linker LINKER = Linker.nativeLinker();
@ -402,7 +605,7 @@ public class TestStringEncoding {
                {""},
                {"X"},
                {"12345"},
-                {"yen \u00A5"},
+                {"section \u00A7"},
                {"snowman \u26C4"},
                {"rainbow \uD83C\uDF08"},
                {"0"},
--- a/test/micro/org/openjdk/bench/java/lang/foreign/FromJavaStringTest.java
+++ b/test/micro/org/openjdk/bench/java/lang/foreign/FromJavaStringTest.java
@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package org.openjdk.bench.java.lang.foreign;
+
+import static java.lang.foreign.ValueLayout.JAVA_BYTE;
+import static java.nio.charset.StandardCharsets.UTF_8;
+
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.Warmup;
+
+import java.lang.foreign.Arena;
+import java.lang.foreign.MemorySegment;
+import java.util.concurrent.TimeUnit;
+
+@BenchmarkMode(Mode.AverageTime)
+@Warmup(iterations = 5, time = 500, timeUnit = TimeUnit.MILLISECONDS)
+@Measurement(iterations = 10, time = 500, timeUnit = TimeUnit.MILLISECONDS)
+@State(Scope.Benchmark)
+@OutputTimeUnit(TimeUnit.NANOSECONDS)
+@Fork(value = 3)
+public class FromJavaStringTest {
+
+    private String str;
+    private MemorySegment strSegment;
+    private int lengthBytes;
+
+    @Param({"5", "20", "100", "200", "451"})
+    int size;
+
+    @Setup
+    public void setup() {
+        var arena = Arena.ofAuto();
+        while (LOREM.length() < size) {
+            LOREM += LOREM;
+        }
+        str = LOREM.substring(0, size);
+        strSegment = arena.allocateFrom(str);
+        lengthBytes = str.getBytes(UTF_8).length;
+    }
+
+    @Benchmark
+    public void segment_setString() {
+        strSegment.setString(0, str, UTF_8);
+    }
+
+    @Benchmark
+    public void segment_copyStringRaw() {
+        MemorySegment.copy(str, UTF_8, 0, strSegment, 0, str.length());
+    }
+
+    @Benchmark
+    public void segment_copyStringBytes() {
+        byte[] bytes = str.getBytes(UTF_8);
+        MemorySegment.copy(bytes, 0, strSegment, JAVA_BYTE, 0, bytes.length);
+    }
+
+    static String LOREM =
+            """
+            Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et
+             dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip
+             ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu
+             fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt
+             mollit anim id est laborum.
+            """;
+}
--- a/test/micro/org/openjdk/bench/java/lang/foreign/ToJavaStringTest.java
+++ b/test/micro/org/openjdk/bench/java/lang/foreign/ToJavaStringTest.java
@ -22,6 +22,9 @@
 */
 package org.openjdk.bench.java.lang.foreign;

+import static java.lang.foreign.ValueLayout.JAVA_BYTE;
+import static java.nio.charset.StandardCharsets.UTF_8;
+
 import org.openjdk.jmh.annotations.Benchmark;
 import org.openjdk.jmh.annotations.BenchmarkMode;
 import org.openjdk.jmh.annotations.Fork;
@ -47,6 +50,7 @@ import java.util.concurrent.TimeUnit;
 public class ToJavaStringTest {

    private MemorySegment strSegment;
+    private int length;

    @Param({"5", "20", "100", "200", "451"})
    int size;
@ -61,19 +65,33 @@ public class ToJavaStringTest {
        while (LOREM.length() < size) {
            LOREM += LOREM;
        }
-        strSegment = arena.allocateFrom(LOREM.substring(0, size));
+        var s = LOREM.substring(0, size);
+        strSegment = arena.allocateFrom(s);
+        length = s.getBytes(UTF_8).length;
    }

    @Benchmark
-    public String panama_readString() {
+    public String segment_getString() {
        return strSegment.getString(0);
    }

+    @Benchmark
+    public String segment_getStringLength() {
+        return strSegment.getString(0, UTF_8, length);
+    }
+
    @Benchmark
    public String jni_readString() {
        return readString(strSegment.address());
    }

+    @Benchmark
+    public String segment_copyStringBytes() {
+        byte[] bytes = new byte[length];
+        MemorySegment.copy(strSegment, JAVA_BYTE, 0, bytes, 0, length);
+        return new String(bytes, UTF_8);
+    }
+
    static native String readString(long addr);

    static String LOREM = """