From 735afd93bbdd63d53dc4cec0ac970026ac95cc64 Mon Sep 17 00:00:00 2001 From: Guanqiang Han Date: Wed, 24 Sep 2025 14:10:19 +0000 Subject: [PATCH] 8366421: ModifiedUtf.utfLen may overflow for giant string Reviewed-by: liach, rriggs --- .../classes/java/io/DataOutputStream.java | 17 +- .../classes/java/io/ObjectOutputStream.java | 8 +- .../classfile/impl/BufWriterImpl.java | 8 +- .../jdk/internal/util/ModifiedUtf.java | 12 +- .../jdk/internal/util/ModifiedUtfTest.java | 145 ++++++++++++++++++ 5 files changed, 171 insertions(+), 19 deletions(-) create mode 100644 test/jdk/jdk/internal/util/ModifiedUtfTest.java diff --git a/src/java.base/share/classes/java/io/DataOutputStream.java b/src/java.base/share/classes/java/io/DataOutputStream.java index 4b22d65bd39..2a0a7526591 100644 --- a/src/java.base/share/classes/java/io/DataOutputStream.java +++ b/src/java.base/share/classes/java/io/DataOutputStream.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 1994, 2024, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1994, 2025, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2024, Alibaba Group Holding Limited. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -26,6 +26,8 @@ package java.io; +import java.lang.runtime.ExactConversionsSupport; + import jdk.internal.access.JavaLangAccess; import jdk.internal.access.SharedSecrets; import jdk.internal.util.ByteArray; @@ -364,11 +366,12 @@ public class DataOutputStream extends FilterOutputStream implements DataOutput { static int writeUTF(String str, DataOutput out) throws IOException { final int strlen = str.length(); int countNonZeroAscii = JLA.countNonZeroAscii(str); - int utflen = utfLen(str, countNonZeroAscii); + long utflenLong = utfLen(str, countNonZeroAscii); - if (utflen > 65535 || /* overflow */ utflen < strlen) - throw new UTFDataFormatException(tooLongMsg(str, utflen)); + if (!ExactConversionsSupport.isLongToCharExact(utflenLong)) + throw new UTFDataFormatException(tooLongMsg(str, utflenLong)); + int utflen = (int)utflenLong; final byte[] bytearr; if (out instanceof DataOutputStream dos) { if (dos.bytearr == null || (dos.bytearr.length < (utflen + 2))) @@ -391,14 +394,12 @@ public class DataOutputStream extends FilterOutputStream implements DataOutput { return utflen + 2; } - private static String tooLongMsg(String s, int bits32) { + private static String tooLongMsg(String s, long utflen) { int slen = s.length(); String head = s.substring(0, 8); String tail = s.substring(slen - 8, slen); - // handle int overflow with max 3x expansion - long actualLength = (long)slen + Integer.toUnsignedLong(bits32 - slen); return "encoded string (" + head + "..." + tail + ") too long: " - + actualLength + " bytes"; + + utflen + " bytes"; } /** diff --git a/src/java.base/share/classes/java/io/ObjectOutputStream.java b/src/java.base/share/classes/java/io/ObjectOutputStream.java index 31413bcf8ed..40777ca1587 100644 --- a/src/java.base/share/classes/java/io/ObjectOutputStream.java +++ b/src/java.base/share/classes/java/io/ObjectOutputStream.java @@ -26,6 +26,8 @@ package java.io; +import java.lang.runtime.ExactConversionsSupport; + import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -1899,12 +1901,12 @@ public class ObjectOutputStream private void writeUTFInternal(String str, boolean writeHeader) throws IOException { int strlen = str.length(); int countNonZeroAscii = JLA.countNonZeroAscii(str); - int utflen = utfLen(str, countNonZeroAscii); - if (utflen <= 0xFFFF) { + long utflen = utfLen(str, countNonZeroAscii); + if (ExactConversionsSupport.isLongToCharExact(utflen)) { if(writeHeader) { writeByte(TC_STRING); } - writeShort(utflen); + writeShort((short)utflen); } else { if(writeHeader) { writeByte(TC_LONGSTRING); diff --git a/src/java.base/share/classes/jdk/internal/classfile/impl/BufWriterImpl.java b/src/java.base/share/classes/jdk/internal/classfile/impl/BufWriterImpl.java index dda9accd8b9..b30592a4ebd 100644 --- a/src/java.base/share/classes/jdk/internal/classfile/impl/BufWriterImpl.java +++ b/src/java.base/share/classes/jdk/internal/classfile/impl/BufWriterImpl.java @@ -30,6 +30,7 @@ import java.lang.classfile.constantpool.ClassEntry; import java.lang.classfile.constantpool.ConstantPool; import java.lang.classfile.constantpool.ConstantPoolBuilder; import java.lang.classfile.constantpool.PoolEntry; +import java.lang.runtime.ExactConversionsSupport; import java.util.Arrays; import jdk.internal.access.JavaLangAccess; @@ -275,8 +276,11 @@ public final class BufWriterImpl implements BufWriter { void writeUtfEntry(String str) { int strlen = str.length(); int countNonZeroAscii = JLA.countNonZeroAscii(str); - int utflen = utfLen(str, countNonZeroAscii); - Util.checkU2(utflen, "utf8 length"); + long utflenLong = utfLen(str, countNonZeroAscii); + if (!ExactConversionsSupport.isLongToCharExact(utflenLong)) { + throw new IllegalArgumentException("utf8 length out of range of u2: " + utflenLong); + } + int utflen = (int)utflenLong; reserveSpace(utflen + 3); int offset = this.offset; diff --git a/src/java.base/share/classes/jdk/internal/util/ModifiedUtf.java b/src/java.base/share/classes/jdk/internal/util/ModifiedUtf.java index e8a4f27796f..46885e12adf 100644 --- a/src/java.base/share/classes/jdk/internal/util/ModifiedUtf.java +++ b/src/java.base/share/classes/jdk/internal/util/ModifiedUtf.java @@ -1,4 +1,5 @@ /* + * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2024, Alibaba Group Holding Limited. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -63,12 +64,12 @@ public abstract class ModifiedUtf { * @param countNonZeroAscii the number of non-zero ascii characters in the prefix calculated by JLA.countNonZeroAscii(str) */ @ForceInline - public static int utfLen(String str, int countNonZeroAscii) { - int utflen = str.length(); - for (int i = utflen - 1; i >= countNonZeroAscii; i--) { + public static long utfLen(String str, int countNonZeroAscii) { + long utflen = str.length(); + for (int i = (int)utflen - 1; i >= countNonZeroAscii; i--) { int c = str.charAt(i); if (c >= 0x80 || c == 0) - utflen += (c >= 0x800) ? 2 : 1; + utflen += (c >= 0x800) ? 2L : 1L; } return utflen; } @@ -90,8 +91,7 @@ public abstract class ModifiedUtf { return false; } // Check exact Modified UTF-8 length. - // The check strLen > CONSTANT_POOL_UTF8_MAX_BYTES above ensures that utfLen can't overflow here. - int utfLen = utfLen(str, 0); + long utfLen = utfLen(str, 0); return utfLen <= CONSTANT_POOL_UTF8_MAX_BYTES; } } diff --git a/test/jdk/jdk/internal/util/ModifiedUtfTest.java b/test/jdk/jdk/internal/util/ModifiedUtfTest.java new file mode 100644 index 00000000000..7572acfba24 --- /dev/null +++ b/test/jdk/jdk/internal/util/ModifiedUtfTest.java @@ -0,0 +1,145 @@ +/* + * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + * @test + * @bug 8366421 + * @summary Test for ModifiedUtf.utfLen() return type change from int to long to avoid overflow + * @modules java.base/jdk.internal.classfile.impl:+open + * java.base/jdk.internal.util + * @run main/othervm -Xmx4g ModifiedUtfTest + */ + +import java.io.ByteArrayOutputStream; +import java.io.DataOutputStream; +import java.io.ObjectOutputStream; +import java.io.OutputStream; +import java.io.UTFDataFormatException; + +import java.lang.classfile.constantpool.ConstantPoolBuilder; +import java.lang.classfile.ClassFile; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; + +import java.nio.ByteBuffer; +import java.nio.ByteOrder; + +import jdk.internal.classfile.impl.BufWriterImpl; +import jdk.internal.classfile.impl.ClassFileImpl; +import jdk.internal.util.ModifiedUtf; + +public class ModifiedUtfTest { + + static class HeaderCapturedException extends RuntimeException { + } + /** + * Keep only a fixed-length output and stop writing further data + * by throwing an exception when the limit is exceeded. + * For testing purposes only. + */ + static class HeaderCaptureOutputStream extends OutputStream { + private byte[] head; + private int count; + + public HeaderCaptureOutputStream(int headSize) { + this.head = new byte[headSize]; + } + + @Override + public void write(int b) { + if (count >= head.length) { + // Only reserve a fixed-length header and throw an exception to stop writing. + throw new HeaderCapturedException(); + } + head[count++] = (byte) b; + } + public byte[] get(){ + return head; + } + } + + private static final String THREE_BYTE = "\u2600"; // 3-byte UTF-8 + + public static void main(String[] args) throws Exception{ + int count = Integer.MAX_VALUE / 3 + 1; + long expected = 3L * count; + String largeString = THREE_BYTE.repeat(count); + + long total = ModifiedUtf.utfLen(largeString, 0); + if (total != expected) { + throw new RuntimeException("Expected total=" + expected + " but got " + total); + } + + /** + * Verifies that the following three methods that call ModifiedUtf.utfLen() + * correctly handle overflow: + * - DataOutputStream.writeUTF(String) + * - BufWriterImpl.writeUtfEntry(String) + * - ObjectOutputStream.writeUTF(String) + */ + try (ByteArrayOutputStream byteOut = new ByteArrayOutputStream(); + DataOutputStream dataOut = new DataOutputStream(byteOut)) { + dataOut.writeUTF(largeString); + throw new RuntimeException("Expected UTFDataFormatException was not thrown."); + } catch (UTFDataFormatException e) { + } + + BufWriterImpl bufWriter = new BufWriterImpl(ConstantPoolBuilder.of(), (ClassFileImpl) ClassFile.of()); + Method writeUtfEntry = bufWriter.getClass().getDeclaredMethod("writeUtfEntry", String.class); + writeUtfEntry.setAccessible(true); + try { + writeUtfEntry.invoke(bufWriter, largeString); + throw new RuntimeException("Expected IllegalArgumentException was not thrown."); + } catch (InvocationTargetException e) { + Throwable cause = e.getCause(); + if (!(cause instanceof IllegalArgumentException)) { + throw new RuntimeException("Expected IllegalArgumentException was not thrown."); + } + } + + /** + * In the writeUTF function, utfLen is used to calculate the length of the string to be written + * and store it in the stream header. This test uses the HeaderCaptureOutputStream inner class + * to capture the header bytes and compare them with the expected length, + * verifying that utfLen returns the correct value. + */ + int lengthFieldSize = 8; + // Offset to UTF length field: 2 bytes STREAM_MAGIC + 2 bytes STREAM_VERSION + 5 bytes block data header + int lengthFieldOffset = 9; + int headerSize = 20; // greater than lengthFieldSize + lengthFieldOffset + HeaderCaptureOutputStream headerOut = new HeaderCaptureOutputStream(headerSize); + try (ObjectOutputStream objOut = new ObjectOutputStream(headerOut)) { + objOut.writeUTF(largeString); + } catch (HeaderCapturedException e) { + } + byte[] header = headerOut.get(); + ByteBuffer bf = ByteBuffer.wrap(header, lengthFieldOffset, lengthFieldSize); + bf.order(ByteOrder.BIG_ENDIAN); + long lenInHeader = bf.getLong(); + if ( lenInHeader != expected ) { + throw new RuntimeException("Header length mismatch: expected=" + expected + ", found=" + lenInHeader); + } + + System.out.println("PASSED"); + } +} \ No newline at end of file