8366421: ModifiedUtf.utfLen may overflow for giant string

Reviewed-by: liach, rriggs
This commit is contained in:
Guanqiang Han 2025-09-24 14:10:19 +00:00 committed by Chen Liang
parent 156eb767f1
commit 735afd93bb
5 changed files with 171 additions and 19 deletions

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 1994, 2024, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1994, 2025, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2024, Alibaba Group Holding Limited. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@ -26,6 +26,8 @@
package java.io;
import java.lang.runtime.ExactConversionsSupport;
import jdk.internal.access.JavaLangAccess;
import jdk.internal.access.SharedSecrets;
import jdk.internal.util.ByteArray;
@ -364,11 +366,12 @@ public class DataOutputStream extends FilterOutputStream implements DataOutput {
static int writeUTF(String str, DataOutput out) throws IOException {
final int strlen = str.length();
int countNonZeroAscii = JLA.countNonZeroAscii(str);
int utflen = utfLen(str, countNonZeroAscii);
long utflenLong = utfLen(str, countNonZeroAscii);
if (utflen > 65535 || /* overflow */ utflen < strlen)
throw new UTFDataFormatException(tooLongMsg(str, utflen));
if (!ExactConversionsSupport.isLongToCharExact(utflenLong))
throw new UTFDataFormatException(tooLongMsg(str, utflenLong));
int utflen = (int)utflenLong;
final byte[] bytearr;
if (out instanceof DataOutputStream dos) {
if (dos.bytearr == null || (dos.bytearr.length < (utflen + 2)))
@ -391,14 +394,12 @@ public class DataOutputStream extends FilterOutputStream implements DataOutput {
return utflen + 2;
}
private static String tooLongMsg(String s, int bits32) {
private static String tooLongMsg(String s, long utflen) {
int slen = s.length();
String head = s.substring(0, 8);
String tail = s.substring(slen - 8, slen);
// handle int overflow with max 3x expansion
long actualLength = (long)slen + Integer.toUnsignedLong(bits32 - slen);
return "encoded string (" + head + "..." + tail + ") too long: "
+ actualLength + " bytes";
+ utflen + " bytes";
}
/**

View File

@ -26,6 +26,8 @@
package java.io;
import java.lang.runtime.ExactConversionsSupport;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
@ -1899,12 +1901,12 @@ public class ObjectOutputStream
private void writeUTFInternal(String str, boolean writeHeader) throws IOException {
int strlen = str.length();
int countNonZeroAscii = JLA.countNonZeroAscii(str);
int utflen = utfLen(str, countNonZeroAscii);
if (utflen <= 0xFFFF) {
long utflen = utfLen(str, countNonZeroAscii);
if (ExactConversionsSupport.isLongToCharExact(utflen)) {
if(writeHeader) {
writeByte(TC_STRING);
}
writeShort(utflen);
writeShort((short)utflen);
} else {
if(writeHeader) {
writeByte(TC_LONGSTRING);

View File

@ -30,6 +30,7 @@ import java.lang.classfile.constantpool.ClassEntry;
import java.lang.classfile.constantpool.ConstantPool;
import java.lang.classfile.constantpool.ConstantPoolBuilder;
import java.lang.classfile.constantpool.PoolEntry;
import java.lang.runtime.ExactConversionsSupport;
import java.util.Arrays;
import jdk.internal.access.JavaLangAccess;
@ -275,8 +276,11 @@ public final class BufWriterImpl implements BufWriter {
void writeUtfEntry(String str) {
int strlen = str.length();
int countNonZeroAscii = JLA.countNonZeroAscii(str);
int utflen = utfLen(str, countNonZeroAscii);
Util.checkU2(utflen, "utf8 length");
long utflenLong = utfLen(str, countNonZeroAscii);
if (!ExactConversionsSupport.isLongToCharExact(utflenLong)) {
throw new IllegalArgumentException("utf8 length out of range of u2: " + utflenLong);
}
int utflen = (int)utflenLong;
reserveSpace(utflen + 3);
int offset = this.offset;

View File

@ -1,4 +1,5 @@
/*
* Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2024, Alibaba Group Holding Limited. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@ -63,12 +64,12 @@ public abstract class ModifiedUtf {
* @param countNonZeroAscii the number of non-zero ascii characters in the prefix calculated by JLA.countNonZeroAscii(str)
*/
@ForceInline
public static int utfLen(String str, int countNonZeroAscii) {
int utflen = str.length();
for (int i = utflen - 1; i >= countNonZeroAscii; i--) {
public static long utfLen(String str, int countNonZeroAscii) {
long utflen = str.length();
for (int i = (int)utflen - 1; i >= countNonZeroAscii; i--) {
int c = str.charAt(i);
if (c >= 0x80 || c == 0)
utflen += (c >= 0x800) ? 2 : 1;
utflen += (c >= 0x800) ? 2L : 1L;
}
return utflen;
}
@ -90,8 +91,7 @@ public abstract class ModifiedUtf {
return false;
}
// Check exact Modified UTF-8 length.
// The check strLen > CONSTANT_POOL_UTF8_MAX_BYTES above ensures that utfLen can't overflow here.
int utfLen = utfLen(str, 0);
long utfLen = utfLen(str, 0);
return utfLen <= CONSTANT_POOL_UTF8_MAX_BYTES;
}
}

View File

@ -0,0 +1,145 @@
/*
* Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
/*
* @test
* @bug 8366421
* @summary Test for ModifiedUtf.utfLen() return type change from int to long to avoid overflow
* @modules java.base/jdk.internal.classfile.impl:+open
* java.base/jdk.internal.util
* @run main/othervm -Xmx4g ModifiedUtfTest
*/
import java.io.ByteArrayOutputStream;
import java.io.DataOutputStream;
import java.io.ObjectOutputStream;
import java.io.OutputStream;
import java.io.UTFDataFormatException;
import java.lang.classfile.constantpool.ConstantPoolBuilder;
import java.lang.classfile.ClassFile;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import jdk.internal.classfile.impl.BufWriterImpl;
import jdk.internal.classfile.impl.ClassFileImpl;
import jdk.internal.util.ModifiedUtf;
public class ModifiedUtfTest {
static class HeaderCapturedException extends RuntimeException {
}
/**
* Keep only a fixed-length output and stop writing further data
* by throwing an exception when the limit is exceeded.
* For testing purposes only.
*/
static class HeaderCaptureOutputStream extends OutputStream {
private byte[] head;
private int count;
public HeaderCaptureOutputStream(int headSize) {
this.head = new byte[headSize];
}
@Override
public void write(int b) {
if (count >= head.length) {
// Only reserve a fixed-length header and throw an exception to stop writing.
throw new HeaderCapturedException();
}
head[count++] = (byte) b;
}
public byte[] get(){
return head;
}
}
private static final String THREE_BYTE = "\u2600"; // 3-byte UTF-8
public static void main(String[] args) throws Exception{
int count = Integer.MAX_VALUE / 3 + 1;
long expected = 3L * count;
String largeString = THREE_BYTE.repeat(count);
long total = ModifiedUtf.utfLen(largeString, 0);
if (total != expected) {
throw new RuntimeException("Expected total=" + expected + " but got " + total);
}
/**
* Verifies that the following three methods that call ModifiedUtf.utfLen()
* correctly handle overflow:
* - DataOutputStream.writeUTF(String)
* - BufWriterImpl.writeUtfEntry(String)
* - ObjectOutputStream.writeUTF(String)
*/
try (ByteArrayOutputStream byteOut = new ByteArrayOutputStream();
DataOutputStream dataOut = new DataOutputStream(byteOut)) {
dataOut.writeUTF(largeString);
throw new RuntimeException("Expected UTFDataFormatException was not thrown.");
} catch (UTFDataFormatException e) {
}
BufWriterImpl bufWriter = new BufWriterImpl(ConstantPoolBuilder.of(), (ClassFileImpl) ClassFile.of());
Method writeUtfEntry = bufWriter.getClass().getDeclaredMethod("writeUtfEntry", String.class);
writeUtfEntry.setAccessible(true);
try {
writeUtfEntry.invoke(bufWriter, largeString);
throw new RuntimeException("Expected IllegalArgumentException was not thrown.");
} catch (InvocationTargetException e) {
Throwable cause = e.getCause();
if (!(cause instanceof IllegalArgumentException)) {
throw new RuntimeException("Expected IllegalArgumentException was not thrown.");
}
}
/**
* In the writeUTF function, utfLen is used to calculate the length of the string to be written
* and store it in the stream header. This test uses the HeaderCaptureOutputStream inner class
* to capture the header bytes and compare them with the expected length,
* verifying that utfLen returns the correct value.
*/
int lengthFieldSize = 8;
// Offset to UTF length field: 2 bytes STREAM_MAGIC + 2 bytes STREAM_VERSION + 5 bytes block data header
int lengthFieldOffset = 9;
int headerSize = 20; // greater than lengthFieldSize + lengthFieldOffset
HeaderCaptureOutputStream headerOut = new HeaderCaptureOutputStream(headerSize);
try (ObjectOutputStream objOut = new ObjectOutputStream(headerOut)) {
objOut.writeUTF(largeString);
} catch (HeaderCapturedException e) {
}
byte[] header = headerOut.get();
ByteBuffer bf = ByteBuffer.wrap(header, lengthFieldOffset, lengthFieldSize);
bf.order(ByteOrder.BIG_ENDIAN);
long lenInHeader = bf.getLong();
if ( lenInHeader != expected ) {
throw new RuntimeException("Header length mismatch: expected=" + expected + ", found=" + lenInHeader);
}
System.out.println("PASSED");
}
}