8164612: NoSuchMethodException when method name contains NULL or Latin-1 supplement character

String length needs to be updated when converting from unicode to utf8. Reviewed-by: kvn, coleenp
2026-07-01 14:50:39 +00:00 · 2016-10-26 14:36:05 +02:00 · 2016-10-26 14:36:05 +02:00 · ed9adcbdfa
commit ed9adcbdfa
parent 82bdf29022
5 changed files with 81 additions and 50 deletions
--- a/hotspot/src/share/vm/classfile/stringTable.cpp
+++ b/hotspot/src/share/vm/classfile/stringTable.cpp
@ -437,17 +437,15 @@ void StringTable::dump(outputStream* st, bool verbose) {
          st->print("%d: ", length);
        } else {
          ResourceMark rm(THREAD);
-          int utf8_length;
+          int utf8_length = length;
          char* utf8_string;

          if (!is_latin1) {
            jchar* chars = value->char_at_addr(0);
-            utf8_length = UNICODE::utf8_length(chars, length);
-            utf8_string = UNICODE::as_utf8(chars, length);
+            utf8_string = UNICODE::as_utf8(chars, utf8_length);
          } else {
            jbyte* bytes = value->byte_at_addr(0);
-            utf8_length = UNICODE::utf8_length(bytes, length);
-            utf8_string = UNICODE::as_utf8(bytes, length);
+            utf8_string = UNICODE::as_utf8(bytes, utf8_length);
          }

          st->print("%d: ", utf8_length);
--- a/hotspot/src/share/vm/prims/jvmtiEnv.cpp
+++ b/hotspot/src/share/vm/prims/jvmtiEnv.cpp
@ -1001,7 +1001,8 @@ JvmtiEnv::GetThreadInfo(jthread thread, jvmtiThreadInfo* info_ptr) {
    if (name() != NULL) {
      n = java_lang_String::as_utf8_string(name());
    } else {
-      n = UNICODE::as_utf8((jchar*) NULL, 0);
+      int utf8_length = 0;
+      n = UNICODE::as_utf8((jchar*) NULL, utf8_length);
    }

    info_ptr->name = (char *) jvmtiMalloc(strlen(n)+1);
--- a/hotspot/src/share/vm/utilities/utf8.cpp
+++ b/hotspot/src/share/vm/utilities/utf8.cpp
@ -411,61 +411,46 @@ bool UNICODE::is_latin1(jchar* base, int length) {
 }

 int UNICODE::utf8_size(jchar c) {
-  if ((0x0001 <= c) && (c <= 0x007F)) return 1;
-  if (c <= 0x07FF) return 2;
-  return 3;
+  if ((0x0001 <= c) && (c <= 0x007F)) {
+    // ASCII character
+    return 1;
+  } else  if (c <= 0x07FF) {
+    return 2;
+  } else {
+    return 3;
+  }
 }

 int UNICODE::utf8_size(jbyte c) {
-  if (c >= 0x0001) return 1;
-  return 2;
-}
-
-int UNICODE::utf8_length(jchar* base, int length) {
-  int result = 0;
-  for (int index = 0; index < length; index++) {
-    jchar c = base[index];
-    if ((0x0001 <= c) && (c <= 0x007F)) result += 1;
-    else if (c <= 0x07FF) result += 2;
-    else result += 3;
+  if (c >= 0x01) {
+    // ASCII character. Check is equivalent to
+    // (0x01 <= c) && (c <= 0x7F) because c is signed.
+    return 1;
+  } else {
+    // Non-ASCII character or 0x00 which needs to be
+    // two-byte encoded as 0xC080 in modified UTF-8.
+    return 2;
  }
-  return result;
 }

-int UNICODE::utf8_length(jbyte* base, int length) {
+template<typename T>
+int UNICODE::utf8_length(T* base, int length) {
  int result = 0;
  for (int index = 0; index < length; index++) {
-    jbyte c = base[index];
+    T c = base[index];
    result += utf8_size(c);
  }
  return result;
 }

-char* UNICODE::as_utf8(jchar* base, int length) {
+template<typename T>
+char* UNICODE::as_utf8(T* base, int& length) {
  int utf8_len = utf8_length(base, length);
  u_char* buf = NEW_RESOURCE_ARRAY(u_char, utf8_len + 1);
  char* result = as_utf8(base, length, (char*) buf, utf8_len + 1);
  assert((int) strlen(result) == utf8_len, "length prediction must be correct");
-  return result;
-}
-
-char* UNICODE::as_utf8(jbyte* base, int length) {
-  int utf8_len = utf8_length(base, length);
-  u_char* result = NEW_RESOURCE_ARRAY(u_char, utf8_len + 1);
-  u_char* p = result;
-  if (utf8_len == length) {
-    for (int index = 0; index < length; index++) {
-      *p++ = base[index];
-    }
-  } else {
-    // Unicode string contains U+0000 which should
-    // be encoded as 0xC080 in "modified" UTF8.
-    for (int index = 0; index < length; index++) {
-      p = utf8_write(p, ((jchar) base[index]) & 0xff);
-    }
-  }
-  *p = '\0';
-  assert(p == &result[utf8_len], "length prediction must be correct");
+  // Set string length to uft8 length
+  length = utf8_len;
  return (char*) result;
 }

@ -490,9 +475,10 @@ char* UNICODE::as_utf8(jbyte* base, int length, char* buf, int buflen) {
    buflen -= sz;
    if (buflen <= 0) break; // string is truncated
    if (sz == 1) {
+      // Copy ASCII characters (UTF-8 is ASCII compatible)
      *p++ = c;
    } else {
-      // Unicode string contains U+0000 which should
+      // Non-ASCII character or 0x00 which should
      // be encoded as 0xC080 in "modified" UTF8.
      p = utf8_write(p, ((jchar) c) & 0xff);
    }
@ -543,6 +529,10 @@ void UNICODE::as_quoted_ascii(const T* base, int length, char* buf, int buflen)
 }

 // Explicit instantiation for all supported types.
+template int UNICODE::utf8_length(jbyte* base, int length);
+template int UNICODE::utf8_length(jchar* base, int length);
+template char* UNICODE::as_utf8(jbyte* base, int& length);
+template char* UNICODE::as_utf8(jchar* base, int& length);
 template int UNICODE::quoted_ascii_length<jbyte>(jbyte* base, int length);
 template int UNICODE::quoted_ascii_length<jchar>(jchar* base, int length);
 template void UNICODE::as_quoted_ascii<jbyte>(const jbyte* base, int length, char* buf, int buflen);
--- a/hotspot/src/share/vm/utilities/utf8.hpp
+++ b/hotspot/src/share/vm/utilities/utf8.hpp
@ -97,16 +97,15 @@ class UNICODE : AllStatic {
  static int utf8_size(jbyte c);

  // returns the utf8 length of a unicode string
-  static int utf8_length(jchar* base, int length);
-  static int utf8_length(jbyte* base, int length);
+  template<typename T> static int utf8_length(T* base, int length);

  // converts a unicode string to utf8 string
  static void convert_to_utf8(const jchar* base, int length, char* utf8_buffer);

  // converts a unicode string to a utf8 string; result is allocated
-  // in resource area unless a buffer is provided.
-  static char* as_utf8(jchar* base, int length);
-  static char* as_utf8(jbyte* base, int length);
+  // in resource area unless a buffer is provided. The unicode 'length'
+  // parameter is set to the length of the result utf8 string.
+  template<typename T> static char* as_utf8(T* base, int& length);
  static char* as_utf8(jchar* base, int length, char* buf, int buflen);
  static char* as_utf8(jbyte* base, int length, char* buf, int buflen);

--- a/hotspot/test/runtime/CompactStrings/TestMethodNames.java
+++ b/hotspot/test/runtime/CompactStrings/TestMethodNames.java
@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+import javax.script.*;
+import java.util.function.*;
+
+/*
+ * @test
+ * @bug 8164612
+ * @summary Test method names containing Latin-1 supplement characters.
+ * @run main/othervm -XX:+CompactStrings TestMethodNames
+ * @run main/othervm -XX:-CompactStrings TestMethodNames
+ */
+public class TestMethodNames {
+    public static void main(String[] args) throws Exception {
+        ScriptEngineManager m = new ScriptEngineManager();
+        ScriptEngine e = m.getEngineByName("nashorn");
+
+        e.eval("({get \"\0\"(){}})[\"\0\"]");
+        e.eval("({get \"\\x80\"(){}})[\"\\x80\"]");
+        e.eval("({get \"\\xff\"(){}})[\"\\xff\"]");
+    }
+}