8353786: Migrate Vector API math library support to FFM API

Reviewed-by: jbhateja, kvn, psandoz, xgong, jvernee, mli
2026-03-14 18:03:44 +00:00 · 2025-04-25 21:22:37 +00:00 · 2025-04-25 21:22:37 +00:00 · e57fd71049
commit e57fd71049
parent 5db62abb42
50 changed files with 920 additions and 475 deletions
--- a/src/hotspot/cpu/aarch64/aarch64.ad
+++ b/src/hotspot/cpu/aarch64/aarch64.ad
@ -2306,11 +2306,11 @@ const RegMask* Matcher::predicate_reg_mask(void) {
 }

 bool Matcher::supports_vector_calling_convention(void) {
-  return EnableVectorSupport && UseVectorStubs;
+  return EnableVectorSupport;
 }

 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
-  assert(EnableVectorSupport && UseVectorStubs, "sanity");
+  assert(EnableVectorSupport, "sanity");
  int lo = V0_num;
  int hi = V0_H_num;
  if (ideal_reg == Op_VecX || ideal_reg == Op_VecA) {
--- a/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp
@ -11166,79 +11166,6 @@ class StubGenerator: public StubCodeGenerator {
    // }
  };

-  void generate_vector_math_stubs() {
-    // Get native vector math stub routine addresses
-    void* libsleef = nullptr;
-    char ebuf[1024];
-    char dll_name[JVM_MAXPATHLEN];
-    if (os::dll_locate_lib(dll_name, sizeof(dll_name), Arguments::get_dll_dir(), "sleef")) {
-      libsleef = os::dll_load(dll_name, ebuf, sizeof ebuf);
-    }
-    if (libsleef == nullptr) {
-      log_info(library)("Failed to load native vector math library, %s!", ebuf);
-      return;
-    }
-    // Method naming convention
-    //   All the methods are named as <OP><T><N>_<U><suffix>
-    //   Where:
-    //     <OP>     is the operation name, e.g. sin
-    //     <T>      is optional to indicate float/double
-    //              "f/d" for vector float/double operation
-    //     <N>      is the number of elements in the vector
-    //              "2/4" for neon, and "x" for sve
-    //     <U>      is the precision level
-    //              "u10/u05" represents 1.0/0.5 ULP error bounds
-    //               We use "u10" for all operations by default
-    //               But for those functions do not have u10 support, we use "u05" instead
-    //     <suffix> indicates neon/sve
-    //              "sve/advsimd" for sve/neon implementations
-    //     e.g. sinfx_u10sve is the method for computing vector float sin using SVE instructions
-    //          cosd2_u10advsimd is the method for computing 2 elements vector double cos using NEON instructions
-    //
-    log_info(library)("Loaded library %s, handle " INTPTR_FORMAT, JNI_LIB_PREFIX "sleef" JNI_LIB_SUFFIX, p2i(libsleef));
-
-    // Math vector stubs implemented with SVE for scalable vector size.
-    if (UseSVE > 0) {
-      for (int op = 0; op < VectorSupport::NUM_VECTOR_OP_MATH; op++) {
-        int vop = VectorSupport::VECTOR_OP_MATH_START + op;
-        // Skip "tanh" because there is performance regression
-        if (vop == VectorSupport::VECTOR_OP_TANH) {
-          continue;
-        }
-
-        // The native library does not support u10 level of "hypot".
-        const char* ulf = (vop == VectorSupport::VECTOR_OP_HYPOT) ? "u05" : "u10";
-
-        snprintf(ebuf, sizeof(ebuf), "%sfx_%ssve", VectorSupport::mathname[op], ulf);
-        StubRoutines::_vector_f_math[VectorSupport::VEC_SIZE_SCALABLE][op] = (address)os::dll_lookup(libsleef, ebuf);
-
-        snprintf(ebuf, sizeof(ebuf), "%sdx_%ssve", VectorSupport::mathname[op], ulf);
-        StubRoutines::_vector_d_math[VectorSupport::VEC_SIZE_SCALABLE][op] = (address)os::dll_lookup(libsleef, ebuf);
-      }
-    }
-
-    // Math vector stubs implemented with NEON for 64/128 bits vector size.
-    for (int op = 0; op < VectorSupport::NUM_VECTOR_OP_MATH; op++) {
-      int vop = VectorSupport::VECTOR_OP_MATH_START + op;
-      // Skip "tanh" because there is performance regression
-      if (vop == VectorSupport::VECTOR_OP_TANH) {
-        continue;
-      }
-
-      // The native library does not support u10 level of "hypot".
-      const char* ulf = (vop == VectorSupport::VECTOR_OP_HYPOT) ? "u05" : "u10";
-
-      snprintf(ebuf, sizeof(ebuf), "%sf4_%sadvsimd", VectorSupport::mathname[op], ulf);
-      StubRoutines::_vector_f_math[VectorSupport::VEC_SIZE_64][op] = (address)os::dll_lookup(libsleef, ebuf);
-
-      snprintf(ebuf, sizeof(ebuf), "%sf4_%sadvsimd", VectorSupport::mathname[op], ulf);
-      StubRoutines::_vector_f_math[VectorSupport::VEC_SIZE_128][op] = (address)os::dll_lookup(libsleef, ebuf);
-
-      snprintf(ebuf, sizeof(ebuf), "%sd2_%sadvsimd", VectorSupport::mathname[op], ulf);
-      StubRoutines::_vector_d_math[VectorSupport::VEC_SIZE_128][op] = (address)os::dll_lookup(libsleef, ebuf);
-    }
-  }
-
  // Initialization
  void generate_initial_stubs() {
    // Generate initial stubs and initializes the entry points
@ -11392,8 +11319,6 @@ class StubGenerator: public StubCodeGenerator {
      StubRoutines::_montgomerySquare = g.generate_multiply();
    }

-    generate_vector_math_stubs();
-
 #endif // COMPILER2

    if (UseChaCha20Intrinsics) {
--- a/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp
@ -642,6 +642,7 @@ void VM_Version::initialize() {
  if (_model2) {
    os::snprintf_checked(buf + buf_used_len, sizeof(buf) - buf_used_len, "(0x%03x)", _model2);
  }
+  size_t features_offset = strnlen(buf, sizeof(buf));
 #define ADD_FEATURE_IF_SUPPORTED(id, name, bit)                 \
  do {                                                          \
    if (VM_Version::supports_##name()) strcat(buf, ", " #name); \
@ -649,7 +650,11 @@ void VM_Version::initialize() {
  CPU_FEATURE_FLAGS(ADD_FEATURE_IF_SUPPORTED)
 #undef ADD_FEATURE_IF_SUPPORTED

-  _features_string = os::strdup(buf);
+  _cpu_info_string = os::strdup(buf);
+
+  _features_string = extract_features_string(_cpu_info_string,
+                                             strnlen(_cpu_info_string, sizeof(buf)),
+                                             features_offset);
 }

 #if defined(LINUX)
@ -716,7 +721,7 @@ void VM_Version::initialize_cpu_information(void) {
  int desc_len = snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "AArch64 ");
  get_compatible_board(_cpu_desc + desc_len, CPU_DETAILED_DESC_BUF_SIZE - desc_len);
  desc_len = (int)strlen(_cpu_desc);
-  snprintf(_cpu_desc + desc_len, CPU_DETAILED_DESC_BUF_SIZE - desc_len, " %s", _features_string);
+  snprintf(_cpu_desc + desc_len, CPU_DETAILED_DESC_BUF_SIZE - desc_len, " %s", _cpu_info_string);

  _initialized = true;
 }
--- a/src/hotspot/cpu/arm/vm_version_arm_32.cpp
+++ b/src/hotspot/cpu/arm/vm_version_arm_32.cpp
@ -295,7 +295,7 @@ void VM_Version::initialize() {
               (has_multiprocessing_extensions() ? ", mp_ext" : ""));

  // buf is started with ", " or is empty
-  _features_string = os::strdup(buf);
+  _cpu_info_string = os::strdup(buf);

  if (has_simd()) {
    if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
@ -363,6 +363,6 @@ void VM_Version::initialize_cpu_information(void) {
  _no_of_threads = _no_of_cores;
  _no_of_sockets = _no_of_cores;
  snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE - 1, "ARM%d", _arm_arch);
-  snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "%s", _features_string);
+  snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "%s", _cpu_info_string);
  _initialized = true;
 }
--- a/src/hotspot/cpu/ppc/vm_version_ppc.cpp
+++ b/src/hotspot/cpu/ppc/vm_version_ppc.cpp
@ -219,7 +219,7 @@ void VM_Version::initialize() {
               (has_brw()     ? " brw"     : "")
               // Make sure number of %s matches num_features!
              );
-  _features_string = os::strdup(buf);
+  _cpu_info_string = os::strdup(buf);
  if (Verbose) {
    print_features();
  }
@ -519,7 +519,7 @@ void VM_Version::print_platform_virtualization_info(outputStream* st) {
 }

 void VM_Version::print_features() {
-  tty->print_cr("Version: %s L1_data_cache_line_size=%d", features_string(), L1_data_cache_line_size());
+  tty->print_cr("Version: %s L1_data_cache_line_size=%d", cpu_info_string(), L1_data_cache_line_size());

  if (Verbose) {
    if (ContendedPaddingWidth > 0) {
@ -726,6 +726,6 @@ void VM_Version::initialize_cpu_information(void) {
  _no_of_threads = _no_of_cores;
  _no_of_sockets = _no_of_cores;
  snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE, "PowerPC POWER%lu", PowerArchitecturePPC64);
-  snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "PPC %s", features_string());
+  snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "PPC %s", cpu_info_string());
  _initialized = true;
 }
--- a/src/hotspot/cpu/riscv/riscv.ad
+++ b/src/hotspot/cpu/riscv/riscv.ad
@ -1950,11 +1950,11 @@ const RegMask* Matcher::predicate_reg_mask(void) {

 // Vector calling convention not yet implemented.
 bool Matcher::supports_vector_calling_convention(void) {
-  return EnableVectorSupport && UseVectorStubs;
+  return EnableVectorSupport;
 }

 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
-  assert(EnableVectorSupport && UseVectorStubs, "sanity");
+  assert(EnableVectorSupport, "sanity");
  assert(ideal_reg == Op_VecA, "sanity");
  // check more info at https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-cc.adoc
  int lo = V8_num;
--- a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
+++ b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
@ -6458,58 +6458,6 @@ static const int64_t right_3_bits = right_n_bits(3);
    return start;
  }

-  void generate_vector_math_stubs() {
-    if (!UseRVV) {
-      log_info(library)("vector is not supported, skip loading vector math (sleef) library!");
-      return;
-    }
-
-    // Get native vector math stub routine addresses
-    void* libsleef = nullptr;
-    char ebuf[1024];
-    char dll_name[JVM_MAXPATHLEN];
-    if (os::dll_locate_lib(dll_name, sizeof(dll_name), Arguments::get_dll_dir(), "sleef")) {
-      libsleef = os::dll_load(dll_name, ebuf, sizeof ebuf);
-    }
-    if (libsleef == nullptr) {
-      log_info(library)("Failed to load native vector math (sleef) library, %s!", ebuf);
-      return;
-    }
-
-    // Method naming convention
-    //   All the methods are named as <OP><T>_<U><suffix>
-    //
-    //   Where:
-    //     <OP>     is the operation name, e.g. sin, cos
-    //     <T>      is to indicate float/double
-    //              "fx/dx" for vector float/double operation
-    //     <U>      is the precision level
-    //              "u10/u05" represents 1.0/0.5 ULP error bounds
-    //               We use "u10" for all operations by default
-    //               But for those functions do not have u10 support, we use "u05" instead
-    //     <suffix> rvv, indicates riscv vector extension
-    //
-    //   e.g. sinfx_u10rvv is the method for computing vector float sin using rvv instructions
-    //
-    log_info(library)("Loaded library %s, handle " INTPTR_FORMAT, JNI_LIB_PREFIX "sleef" JNI_LIB_SUFFIX, p2i(libsleef));
-
-    for (int op = 0; op < VectorSupport::NUM_VECTOR_OP_MATH; op++) {
-      int vop = VectorSupport::VECTOR_OP_MATH_START + op;
-      if (vop == VectorSupport::VECTOR_OP_TANH) { // skip tanh because of performance regression
-        continue;
-      }
-
-      // The native library does not support u10 level of "hypot".
-      const char* ulf = (vop == VectorSupport::VECTOR_OP_HYPOT) ? "u05" : "u10";
-
-      snprintf(ebuf, sizeof(ebuf), "%sfx_%srvv", VectorSupport::mathname[op], ulf);
-      StubRoutines::_vector_f_math[VectorSupport::VEC_SIZE_SCALABLE][op] = (address)os::dll_lookup(libsleef, ebuf);
-
-      snprintf(ebuf, sizeof(ebuf), "%sdx_%srvv", VectorSupport::mathname[op], ulf);
-      StubRoutines::_vector_d_math[VectorSupport::VEC_SIZE_SCALABLE][op] = (address)os::dll_lookup(libsleef, ebuf);
-    }
-  }
-
 #endif // COMPILER2

  /**
@ -6741,8 +6689,6 @@ static const int64_t right_3_bits = right_n_bits(3);

    generate_string_indexof_stubs();

-    generate_vector_math_stubs();
-
 #endif // COMPILER2
  }

--- a/src/hotspot/cpu/riscv/vm_version_riscv.cpp
+++ b/src/hotspot/cpu/riscv/vm_version_riscv.cpp
@ -468,7 +468,7 @@ void VM_Version::initialize_cpu_information(void) {
  _no_of_threads = _no_of_cores;
  _no_of_sockets = _no_of_cores;
  snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE - 1, "RISCV64");
-  snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "RISCV64 %s", features_string());
+  snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "RISCV64 %s", cpu_info_string());
  _initialized = true;
 }

--- a/src/hotspot/cpu/s390/vm_version_s390.cpp
+++ b/src/hotspot/cpu/s390/vm_version_s390.cpp
@ -90,7 +90,7 @@ static const char* z_features[] = {"  ",

 void VM_Version::initialize() {
  determine_features();      // Get processor capabilities.
-  set_features_string();     // Set a descriptive feature indication.
+  set_cpu_info_string();     // Set a descriptive feature indication.

  if (Verbose || PrintAssembly || PrintStubCode) {
    print_features_internal("CPU Version as detected internally:", PrintAssembly || PrintStubCode);
@ -388,9 +388,9 @@ int VM_Version::get_model_index() {
 }


-void VM_Version::set_features_string() {
-  // A note on the _features_string format:
-  //   There are jtreg tests checking the _features_string for various properties.
+void VM_Version::set_cpu_info_string() {
+  // A note on the _cpu_info_string format:
+  //   There are jtreg tests checking the _cpu_info_string for various properties.
  //   For some strange reason, these tests require the string to contain
  //   only _lowercase_ characters. Keep that in mind when being surprised
  //   about the unusual notation of features - and when adding new ones.
@ -412,29 +412,29 @@ void VM_Version::set_features_string() {
    _model_string = "unknown model";
    strcpy(buf, "z/Architecture (ambiguous detection)");
  }
-  _features_string = os::strdup(buf);
+  _cpu_info_string = os::strdup(buf);

  if (has_Crypto_AES()) {
-    assert(strlen(_features_string) + 3*8 < sizeof(buf), "increase buffer size");
+    assert(strlen(_cpu_info_string) + 3*8 < sizeof(buf), "increase buffer size");
    jio_snprintf(buf, sizeof(buf), "%s%s%s%s",
-                 _features_string,
+                 _cpu_info_string,
                 has_Crypto_AES128() ? ", aes128" : "",
                 has_Crypto_AES192() ? ", aes192" : "",
                 has_Crypto_AES256() ? ", aes256" : "");
-    os::free((void *)_features_string);
-    _features_string = os::strdup(buf);
+    os::free((void *)_cpu_info_string);
+    _cpu_info_string = os::strdup(buf);
  }

  if (has_Crypto_SHA()) {
-    assert(strlen(_features_string) + 6 + 2*8 + 7 < sizeof(buf), "increase buffer size");
+    assert(strlen(_cpu_info_string) + 6 + 2*8 + 7 < sizeof(buf), "increase buffer size");
    jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s",
-                 _features_string,
+                 _cpu_info_string,
                 has_Crypto_SHA1()   ? ", sha1"   : "",
                 has_Crypto_SHA256() ? ", sha256" : "",
                 has_Crypto_SHA512() ? ", sha512" : "",
                 has_Crypto_GHASH()  ? ", ghash"  : "");
-    os::free((void *)_features_string);
-    _features_string = os::strdup(buf);
+    os::free((void *)_cpu_info_string);
+    _cpu_info_string = os::strdup(buf);
  }
 }

@ -464,7 +464,7 @@ bool VM_Version::test_feature_bit(unsigned long* featureBuffer, int featureNum,
 }

 void VM_Version::print_features_internal(const char* text, bool print_anyway) {
-  tty->print_cr("%s %s", text, features_string());
+  tty->print_cr("%s %s", text, cpu_info_string());
  tty->cr();

  if (Verbose || print_anyway) {
@ -906,7 +906,7 @@ void VM_Version::set_features_from(const char* march) {
      err = true;
    }
    if (!err) {
-      set_features_string();
+      set_cpu_info_string();
      if (prt || PrintAssembly) {
        print_features_internal("CPU Version as set by cmdline option:", prt);
      }
@ -1542,6 +1542,6 @@ void VM_Version::initialize_cpu_information(void) {
  _no_of_threads = _no_of_cores;
  _no_of_sockets = _no_of_cores;
  snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE, "s390 %s", VM_Version::get_model_string());
-  snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "s390 %s", features_string());
+  snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "s390 %s", cpu_info_string());
  _initialized = true;
 }
--- a/src/hotspot/cpu/s390/vm_version_s390.hpp
+++ b/src/hotspot/cpu/s390/vm_version_s390.hpp
@ -148,7 +148,7 @@ class VM_Version: public Abstract_VM_Version {

  static bool test_feature_bit(unsigned long* featureBuffer, int featureNum, unsigned int bufLen);
  static int  get_model_index();
-  static void set_features_string();
+  static void set_cpu_info_string();
  static void print_features_internal(const char* text, bool print_anyway=false);
  static void determine_features();
  static long call_getFeatures(unsigned long* buffer, int buflen, int functionCode);
--- a/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp
+++ b/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp
@ -4333,70 +4333,6 @@ void StubGenerator::generate_compiler_stubs() {
    }
  }

-  // Get svml stub routine addresses
-  void *libjsvml = nullptr;
-  char ebuf[1024];
-  char dll_name[JVM_MAXPATHLEN];
-  if (os::dll_locate_lib(dll_name, sizeof(dll_name), Arguments::get_dll_dir(), "jsvml")) {
-    libjsvml = os::dll_load(dll_name, ebuf, sizeof ebuf);
-  }
-  if (libjsvml != nullptr) {
-    // SVML method naming convention
-    //   All the methods are named as __jsvml_op<T><N>_ha_<VV>
-    //   Where:
-    //      ha stands for high accuracy
-    //      <T> is optional to indicate float/double
-    //              Set to f for vector float operation
-    //              Omitted for vector double operation
-    //      <N> is the number of elements in the vector
-    //              1, 2, 4, 8, 16
-    //              e.g. 128 bit float vector has 4 float elements
-    //      <VV> indicates the avx/sse level:
-    //              z0 is AVX512, l9 is AVX2, e9 is AVX1 and ex is for SSE2
-    //      e.g. __jsvml_expf16_ha_z0 is the method for computing 16 element vector float exp using AVX 512 insns
-    //           __jsvml_exp8_ha_z0 is the method for computing 8 element vector double exp using AVX 512 insns
-
-    log_info(library)("Loaded library %s, handle " INTPTR_FORMAT, JNI_LIB_PREFIX "jsvml" JNI_LIB_SUFFIX, p2i(libjsvml));
-    if (UseAVX > 2) {
-      for (int op = 0; op < VectorSupport::NUM_VECTOR_OP_MATH; op++) {
-        int vop = VectorSupport::VECTOR_OP_MATH_START + op;
-        if ((!VM_Version::supports_avx512dq()) &&
-            (vop == VectorSupport::VECTOR_OP_LOG || vop == VectorSupport::VECTOR_OP_LOG10 || vop == VectorSupport::VECTOR_OP_POW)) {
-          continue;
-        }
-        snprintf(ebuf, sizeof(ebuf), "__jsvml_%sf16_ha_z0", VectorSupport::mathname[op]);
-        StubRoutines::_vector_f_math[VectorSupport::VEC_SIZE_512][op] = (address)os::dll_lookup(libjsvml, ebuf);
-
-        snprintf(ebuf, sizeof(ebuf), "__jsvml_%s8_ha_z0", VectorSupport::mathname[op]);
-        StubRoutines::_vector_d_math[VectorSupport::VEC_SIZE_512][op] = (address)os::dll_lookup(libjsvml, ebuf);
-      }
-    }
-    const char* avx_sse_str = (UseAVX >= 2) ? "l9" : ((UseAVX == 1) ? "e9" : "ex");
-    for (int op = 0; op < VectorSupport::NUM_VECTOR_OP_MATH; op++) {
-      int vop = VectorSupport::VECTOR_OP_MATH_START + op;
-      if (vop == VectorSupport::VECTOR_OP_POW) {
-        continue;
-      }
-      snprintf(ebuf, sizeof(ebuf), "__jsvml_%sf4_ha_%s", VectorSupport::mathname[op], avx_sse_str);
-      StubRoutines::_vector_f_math[VectorSupport::VEC_SIZE_64][op] = (address)os::dll_lookup(libjsvml, ebuf);
-
-      snprintf(ebuf, sizeof(ebuf), "__jsvml_%sf4_ha_%s", VectorSupport::mathname[op], avx_sse_str);
-      StubRoutines::_vector_f_math[VectorSupport::VEC_SIZE_128][op] = (address)os::dll_lookup(libjsvml, ebuf);
-
-      snprintf(ebuf, sizeof(ebuf), "__jsvml_%sf8_ha_%s", VectorSupport::mathname[op], avx_sse_str);
-      StubRoutines::_vector_f_math[VectorSupport::VEC_SIZE_256][op] = (address)os::dll_lookup(libjsvml, ebuf);
-
-      snprintf(ebuf, sizeof(ebuf), "__jsvml_%s1_ha_%s", VectorSupport::mathname[op], avx_sse_str);
-      StubRoutines::_vector_d_math[VectorSupport::VEC_SIZE_64][op] = (address)os::dll_lookup(libjsvml, ebuf);
-
-      snprintf(ebuf, sizeof(ebuf), "__jsvml_%s2_ha_%s", VectorSupport::mathname[op], avx_sse_str);
-      StubRoutines::_vector_d_math[VectorSupport::VEC_SIZE_128][op] = (address)os::dll_lookup(libjsvml, ebuf);
-
-      snprintf(ebuf, sizeof(ebuf), "__jsvml_%s4_ha_%s", VectorSupport::mathname[op], avx_sse_str);
-      StubRoutines::_vector_d_math[VectorSupport::VEC_SIZE_256][op] = (address)os::dll_lookup(libjsvml, ebuf);
-    }
-  }
-
 #endif // COMPILER2
 #endif // COMPILER2_OR_JVMCI
 }
--- a/src/hotspot/cpu/x86/vm_version_x86.cpp
+++ b/src/hotspot/cpu/x86/vm_version_x86.cpp
@ -1080,15 +1080,19 @@ void VM_Version::get_processor_features() {
  }

  char buf[1024];
-  int res = jio_snprintf(
+  int cpu_info_size = jio_snprintf(
              buf, sizeof(buf),
              "(%u cores per cpu, %u threads per core) family %d model %d stepping %d microcode 0x%x",
              cores_per_cpu(), threads_per_core(),
              cpu_family(), _model, _stepping, os::cpu_microcode_revision());
-  assert(res > 0, "not enough temporary space allocated");
-  insert_features_names(buf + res, sizeof(buf) - res, _features_names);
+  assert(cpu_info_size > 0, "not enough temporary space allocated");
+  insert_features_names(buf + cpu_info_size, sizeof(buf) - cpu_info_size, _features_names);

-  _features_string = os::strdup(buf);
+  _cpu_info_string = os::strdup(buf);
+
+  _features_string = extract_features_string(_cpu_info_string,
+                                             strnlen(_cpu_info_string, sizeof(buf)),
+                                             cpu_info_size);

  // Use AES instructions if available.
  if (supports_aes()) {
--- a/src/hotspot/cpu/x86/x86_64.ad
+++ b/src/hotspot/cpu/x86/x86_64.ad
@ -1596,14 +1596,11 @@ uint MachUEPNode::size(PhaseRegAlloc* ra_) const
 //=============================================================================

 bool Matcher::supports_vector_calling_convention(void) {
-  if (EnableVectorSupport && UseVectorStubs) {
-    return true;
-  }
-  return false;
+  return EnableVectorSupport;
 }

 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
-  assert(EnableVectorSupport && UseVectorStubs, "sanity");
+  assert(EnableVectorSupport, "sanity");
  int lo = XMM0_num;
  int hi = XMM0b_num;
  if (ideal_reg == Op_VecX) hi = XMM0d_num;
--- a/src/hotspot/cpu/zero/vm_version_zero.cpp
+++ b/src/hotspot/cpu/zero/vm_version_zero.cpp
@ -151,6 +151,6 @@ void VM_Version::initialize_cpu_information(void) {
  _no_of_threads = _no_of_cores;
  _no_of_sockets = _no_of_cores;
  snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE - 1, "Zero VM");
-  snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "%s", _features_string);
+  snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "%s", _cpu_info_string);
  _initialized = true;
 }
--- a/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp
+++ b/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp
@ -129,6 +129,9 @@ void VM_Version::setup_cpu_available_features() {
    snprintf(buf, sizeof(buf)/2, "%s ", uarch);
  }
  os::free((void*) uarch);
+
+  int features_offset = strnlen(buf, sizeof(buf));
+
  strcat(buf, "rv64");
  int i = 0;
  while (_feature_list[i] != nullptr) {
@ -191,7 +194,9 @@ void VM_Version::setup_cpu_available_features() {
    }
  }

-  _features_string = os::strdup(buf);
+  _cpu_info_string = os::strdup(buf);
+
+  _features_string = _cpu_info_string + features_offset;
 }

 void VM_Version::os_aux_features() {
--- a/src/hotspot/share/ci/ciInstance.cpp
+++ b/src/hotspot/share/ci/ciInstance.cpp
@ -138,3 +138,9 @@ ciKlass* ciInstance::java_lang_Class_klass() {
  assert(java_lang_Class::as_Klass(get_oop()) != nullptr, "klass is null");
  return CURRENT_ENV->get_metadata(java_lang_Class::as_Klass(get_oop()))->as_klass();
 }
+
+char* ciInstance::java_lang_String_str(char* buf, size_t buflen) {
+  VM_ENTRY_MARK;
+  assert(get_oop()->is_a(vmClasses::String_klass()), "not a String");
+  return java_lang_String::as_utf8_string(get_oop(), buf, buflen);
+}
--- a/src/hotspot/share/ci/ciInstance.hpp
+++ b/src/hotspot/share/ci/ciInstance.hpp
@ -67,6 +67,7 @@ public:
  ciConstant field_value_by_offset(int field_offset);

  ciKlass* java_lang_Class_klass();
+  char* java_lang_String_str(char* buf, size_t buflen);
 };

 #endif // SHARE_CI_CIINSTANCE_HPP
--- a/src/hotspot/share/classfile/modules.cpp
+++ b/src/hotspot/share/classfile/modules.cpp
@ -466,13 +466,9 @@ void Modules::define_module(Handle module, jboolean is_open, jstring version,
    if (EnableVectorSupport && EnableVectorReboxing && FLAG_IS_DEFAULT(EnableVectorAggressiveReboxing)) {
      FLAG_SET_DEFAULT(EnableVectorAggressiveReboxing, true);
    }
-    if (EnableVectorSupport && FLAG_IS_DEFAULT(UseVectorStubs)) {
-      FLAG_SET_DEFAULT(UseVectorStubs, true);
-    }
    log_info(compilation)("EnableVectorSupport=%s",            (EnableVectorSupport            ? "true" : "false"));
    log_info(compilation)("EnableVectorReboxing=%s",           (EnableVectorReboxing           ? "true" : "false"));
    log_info(compilation)("EnableVectorAggressiveReboxing=%s", (EnableVectorAggressiveReboxing ? "true" : "false"));
-    log_info(compilation)("UseVectorStubs=%s",                 (UseVectorStubs                 ? "true" : "false"));
  }
 #endif // COMPILER2_OR_JVMCI
 }
--- a/src/hotspot/share/classfile/vmIntrinsics.hpp
+++ b/src/hotspot/share/classfile/vmIntrinsics.hpp
@ -1001,7 +1001,8 @@ class methodHandle;
  do_intrinsic(_VectorUnaryOp, jdk_internal_vm_vector_VectorSupport, vector_unary_op_name, vector_unary_op_sig, F_S)                           \
   do_signature(vector_unary_op_sig, "(I"                                                                                                      \
                                      "Ljava/lang/Class;"                                                                                      \
-                                      "Ljava/lang/Class;Ljava/lang/Class;"                                                                     \
+                                      "Ljava/lang/Class;"                                                                                      \
+                                      "Ljava/lang/Class;"                                                                                      \
                                      "I"                                                                                                      \
                                      "Ljdk/internal/vm/vector/VectorSupport$Vector;"                                                          \
                                      "Ljdk/internal/vm/vector/VectorSupport$VectorMask;"                                                      \
@ -1022,6 +1023,29 @@ class methodHandle;
                                       "Ljdk/internal/vm/vector/VectorSupport$VectorPayload;")                                                 \
   do_name(vector_binary_op_name,     "binaryOp")                                                                                              \
                                                                                                                                               \
+  do_intrinsic(_VectorUnaryLibOp, jdk_internal_vm_vector_VectorSupport, vector_unary_lib_op_name, vector_unary_lib_op_sig, F_S)                \
+   do_signature(vector_unary_lib_op_sig,"(J"                                                                                                   \
+                                         "Ljava/lang/Class;"                                                                                   \
+                                         "Ljava/lang/Class;"                                                                                   \
+                                         "I"                                                                                                   \
+                                         "Ljava/lang/String;"                                                                                  \
+                                         "Ljdk/internal/vm/vector/VectorSupport$Vector;"                                                       \
+                                         "Ljdk/internal/vm/vector/VectorSupport$UnaryOperation;)"                                              \
+                                         "Ljdk/internal/vm/vector/VectorSupport$Vector;")                                                      \
+   do_name(vector_unary_lib_op_name, "libraryUnaryOp")                                                                                         \
+                                                                                                                                               \
+  do_intrinsic(_VectorBinaryLibOp, jdk_internal_vm_vector_VectorSupport, vector_binary_lib_op_name, vector_binary_lib_op_sig, F_S)             \
+   do_signature(vector_binary_lib_op_sig,"(J"                                                                                                  \
+                                          "Ljava/lang/Class;"                                                                                  \
+                                          "Ljava/lang/Class;"                                                                                  \
+                                          "I"                                                                                                  \
+                                          "Ljava/lang/String;"                                                                                 \
+                                          "Ljdk/internal/vm/vector/VectorSupport$VectorPayload;"                                               \
+                                          "Ljdk/internal/vm/vector/VectorSupport$VectorPayload;"                                               \
+                                          "Ljdk/internal/vm/vector/VectorSupport$BinaryOperation;)"                                            \
+                                          "Ljdk/internal/vm/vector/VectorSupport$VectorPayload;")                                              \
+   do_name(vector_binary_lib_op_name, "libraryBinaryOp")                                                                                       \
+                                                                                                                                               \
  do_intrinsic(_VectorTernaryOp, jdk_internal_vm_vector_VectorSupport, vector_ternary_op_name, vector_ternary_op_sig, F_S)                     \
   do_signature(vector_ternary_op_sig, "(I"                                                                                                    \
                                        "Ljava/lang/Class;"                                                                                    \
--- a/src/hotspot/share/jvmci/jvmci_globals.cpp
+++ b/src/hotspot/share/jvmci/jvmci_globals.cpp
@ -144,8 +144,9 @@ bool JVMCIGlobals::check_jvmci_flags_are_consistent() {
  JVMCI_FLAG_CHECKED(UseMulAddIntrinsic)
  JVMCI_FLAG_CHECKED(UseMontgomeryMultiplyIntrinsic)
  JVMCI_FLAG_CHECKED(UseMontgomerySquareIntrinsic)
-  JVMCI_FLAG_CHECKED(UseVectorStubs)
 #endif // !COMPILER2
+       //
+  JVMCI_FLAG_CHECKED(UseVectorStubs)

 #ifndef PRODUCT
 #define JVMCI_CHECK4(type, name, value, ...) assert(name##checked, #name " flag not checked");
--- a/src/hotspot/share/jvmci/jvmci_globals.hpp
+++ b/src/hotspot/share/jvmci/jvmci_globals.hpp
@ -184,8 +184,8 @@ class fileStream;
  NOT_COMPILER2(product(bool, EnableVectorAggressiveReboxing, false, EXPERIMENTAL, \
          "Enables aggressive reboxing of vectors"))                        \
                                                                            \
-  NOT_COMPILER2(product(bool, UseVectorStubs, false, EXPERIMENTAL,          \
-          "Use stubs for vector transcendental operations"))                \
+  product(bool, UseVectorStubs, false, EXPERIMENTAL,                        \
+          "Use stubs for vector transcendental operations")                 \

 // end of JVMCI_FLAGS

--- a/src/hotspot/share/opto/c2_globals.hpp
+++ b/src/hotspot/share/opto/c2_globals.hpp
@ -760,9 +760,6 @@
  product(bool, EnableVectorAggressiveReboxing, false, EXPERIMENTAL,        \
          "Enables aggressive reboxing of vectors")                         \
                                                                            \
-  product(bool, UseVectorStubs, false, EXPERIMENTAL,                        \
-          "Use stubs for vector transcendental operations")                 \
-                                                                            \
  product(bool, UseTypeSpeculation, true,                                   \
          "Speculatively propagate types from profiles")                    \
                                                                            \
--- a/src/hotspot/share/opto/c2compiler.cpp
+++ b/src/hotspot/share/opto/c2compiler.cpp
@ -848,6 +848,9 @@ bool C2Compiler::is_intrinsic_supported(vmIntrinsics::ID id) {
  case vmIntrinsics::_IndexVector:
  case vmIntrinsics::_IndexPartiallyInUpperRange:
    return EnableVectorSupport;
+  case vmIntrinsics::_VectorUnaryLibOp:
+  case vmIntrinsics::_VectorBinaryLibOp:
+    return EnableVectorSupport && Matcher::supports_vector_calling_convention();
  case vmIntrinsics::_blackhole:
 #if INCLUDE_JVMTI
  case vmIntrinsics::_notifyJvmtiVThreadStart:
--- a/src/hotspot/share/opto/library_call.cpp
+++ b/src/hotspot/share/opto/library_call.cpp
@ -721,6 +721,10 @@ bool LibraryCallKit::try_to_inline(int predicate) {
    return inline_vector_nary_operation(1);
  case vmIntrinsics::_VectorBinaryOp:
    return inline_vector_nary_operation(2);
+  case vmIntrinsics::_VectorUnaryLibOp:
+    return inline_vector_call(1);
+  case vmIntrinsics::_VectorBinaryLibOp:
+    return inline_vector_call(2);
  case vmIntrinsics::_VectorTernaryOp:
    return inline_vector_nary_operation(3);
  case vmIntrinsics::_VectorFromBitsCoerced:
--- a/src/hotspot/share/opto/library_call.hpp
+++ b/src/hotspot/share/opto/library_call.hpp
@ -369,6 +369,7 @@ class LibraryCallKit : public GraphKit {

  // Vector API support
  bool inline_vector_nary_operation(int n);
+  bool inline_vector_call(int arity);
  bool inline_vector_frombits_coerced();
  bool inline_vector_mask_operation();
  bool inline_vector_mem_operation(bool is_store);
--- a/src/hotspot/share/opto/vectorIntrinsics.cpp
+++ b/src/hotspot/share/opto/vectorIntrinsics.cpp
@ -366,17 +366,11 @@ bool LibraryCallKit::inline_vector_nary_operation(int n) {
  int num_elem = vlen->get_con();
  int opc = VectorSupport::vop2ideal(opr->get_con(), elem_bt);
  int sopc = has_scalar_op ? VectorNode::opcode(opc, elem_bt) : opc;
-  if ((opc != Op_CallLeafVector) && (sopc == 0)) {
-    log_if_needed("  ** operation not supported: opc=%s bt=%s", NodeClassNames[opc], type2name(elem_bt));
+  if (sopc == 0 || num_elem == 1) {
+    log_if_needed("  ** operation not supported: arity=%d opc=%s[%d] vlen=%d etype=%s",
+                    n, NodeClassNames[opc], opc, num_elem, type2name(elem_bt));
    return false; // operation not supported
  }
-  if (num_elem == 1) {
-    if (opc != Op_CallLeafVector || elem_bt != T_DOUBLE) {
-      log_if_needed("  ** not a svml call: arity=%d opc=%d vlen=%d etype=%s",
-                      n, opc, num_elem, type2name(elem_bt));
-      return false;
-    }
-  }
  ciKlass* vbox_klass = vector_klass->const_oop()->as_instance()->java_lang_Class_klass();
  const TypeInstPtr* vbox_type = TypeInstPtr::make_exact(TypePtr::NotNull, vbox_klass);

@ -384,22 +378,6 @@ bool LibraryCallKit::inline_vector_nary_operation(int n) {
    assert(!is_masked_op, "mask operations do not need mask to control");
  }

-  if (opc == Op_CallLeafVector) {
-    if (!UseVectorStubs) {
-      log_if_needed("  ** vector stubs support is disabled");
-      return false;
-    }
-    if (!Matcher::supports_vector_calling_convention()) {
-      log_if_needed("  ** no vector calling conventions supported");
-      return false;
-    }
-    if (!Matcher::vector_size_supported(elem_bt, num_elem)) {
-      log_if_needed("  ** vector size (vlen=%d, etype=%s) is not supported",
-                      num_elem, type2name(elem_bt));
-      return false;
-    }
-  }
-
  // When using mask, mask use type needs to be VecMaskUseLoad.
  VectorMaskUseType mask_use_type = is_vector_mask(vbox_klass) ? VecMaskUseAll
                                      : is_masked_op ? VecMaskUseLoad : VecMaskNotUsed;
@ -464,30 +442,18 @@ bool LibraryCallKit::inline_vector_nary_operation(int n) {
  }

  Node* operation = nullptr;
-  if (opc == Op_CallLeafVector) {
-    assert(UseVectorStubs, "sanity");
-    operation = gen_call_to_vector_math(opr->get_con(), elem_bt, num_elem, opd1, opd2);
-    if (operation == nullptr) {
-      log_if_needed("  ** Vector math call failed for %s_%s_%d",
-                         (elem_bt == T_FLOAT) ? "float" : "double",
-                         VectorSupport::mathname[opr->get_con() - VectorSupport::VECTOR_OP_MATH_START],
-                         num_elem * type2aelembytes(elem_bt));
-      return false;
-     }
-  } else {
-    const TypeVect* vt = TypeVect::make(elem_bt, num_elem, is_vector_mask(vbox_klass));
-    switch (n) {
-      case 1:
-      case 2: {
-        operation = VectorNode::make(sopc, opd1, opd2, vt, is_vector_mask(vbox_klass), VectorNode::is_shift_opcode(opc), is_unsigned);
-        break;
-      }
-      case 3: {
-        operation = VectorNode::make(sopc, opd1, opd2, opd3, vt);
-        break;
-      }
-      default: fatal("unsupported arity: %d", n);
+  const TypeVect* vt = TypeVect::make(elem_bt, num_elem, is_vector_mask(vbox_klass));
+  switch (n) {
+    case 1:
+    case 2: {
+      operation = VectorNode::make(sopc, opd1, opd2, vt, is_vector_mask(vbox_klass), VectorNode::is_shift_opcode(opc), is_unsigned);
+      break;
    }
+    case 3: {
+      operation = VectorNode::make(sopc, opd1, opd2, opd3, vt);
+      break;
+    }
+    default: fatal("unsupported arity: %d", n);
  }

  if (is_masked_op && mask != nullptr) {
@ -510,6 +476,107 @@ bool LibraryCallKit::inline_vector_nary_operation(int n) {
 }

 // public static
+// <V extends Vector<E>, E>
+// V libraryUnaryOp(long address, Class<? extends V> vClass, Class<E> elementType, int length, String debugName,
+//                  V v,
+//                  UnaryOperation<V, ?> defaultImpl)
+//
+// public static
+// <V extends VectorPayload, E>
+// V libraryBinaryOp(long address, Class<? extends V> vClass, Class<E> elementType, int length, String debugName,
+//            V v1, V v2,
+//            BinaryOperation<V, ?> defaultImpl)
+bool LibraryCallKit::inline_vector_call(int arity) {
+  assert(Matcher::supports_vector_calling_convention(), "required");
+
+  const TypeLong*    entry          = gvn().type(argument(0))->isa_long();
+  const TypeInstPtr* vector_klass   = gvn().type(argument(2))->isa_instptr();
+  const TypeInstPtr* elem_klass     = gvn().type(argument(3))->isa_instptr();
+  const TypeInt*     vlen           = gvn().type(argument(4))->isa_int();
+  const TypeInstPtr* debug_name_oop = gvn().type(argument(5))->isa_instptr();
+
+  if (entry        == nullptr   || !entry->is_con() ||
+      vector_klass == nullptr   || vector_klass->const_oop() == nullptr ||
+      elem_klass   == nullptr   || elem_klass->const_oop() == nullptr ||
+      vlen         == nullptr   || !vlen->is_con() ||
+      debug_name_oop == nullptr || debug_name_oop->const_oop() == nullptr) {
+    log_if_needed("  ** missing constant: opr=%s vclass=%s etype=%s vlen=%s debug_name=%s",
+                  NodeClassNames[argument(0)->Opcode()],
+                  NodeClassNames[argument(2)->Opcode()],
+                  NodeClassNames[argument(3)->Opcode()],
+                  NodeClassNames[argument(4)->Opcode()],
+                  NodeClassNames[argument(5)->Opcode()]);
+    return false; // not enough info for intrinsification
+  }
+
+  if (entry->get_con() == 0) {
+    log_if_needed("  ** missing entry point");
+    return false;
+  }
+
+  ciType* elem_type = elem_klass->const_oop()->as_instance()->java_mirror_type();
+  if (!elem_type->is_primitive_type()) {
+    log_if_needed("  ** not a primitive bt=%d", elem_type->basic_type());
+    return false; // should be primitive type
+  }
+  if (!is_klass_initialized(vector_klass)) {
+    log_if_needed("  ** klass argument not initialized");
+    return false;
+  }
+
+  BasicType elem_bt = elem_type->basic_type();
+  int num_elem = vlen->get_con();
+  if (!Matcher::vector_size_supported(elem_bt, num_elem)) {
+    log_if_needed("  ** vector size (vlen=%d, etype=%s) is not supported",
+                  num_elem, type2name(elem_bt));
+    return false;
+  }
+
+  ciKlass* vbox_klass = vector_klass->const_oop()->as_instance()->java_lang_Class_klass();
+  const TypeInstPtr* vbox_type = TypeInstPtr::make_exact(TypePtr::NotNull, vbox_klass);
+
+  Node* opd1 = unbox_vector(argument(6), vbox_type, elem_bt, num_elem);
+  if (opd1 == nullptr) {
+    log_if_needed("  ** unbox failed v1=%s", NodeClassNames[argument(6)->Opcode()]);
+    return false;
+  }
+
+  Node* opd2 = nullptr;
+  if (arity > 1) {
+    opd2 = unbox_vector(argument(7), vbox_type, elem_bt, num_elem);
+    if (opd2 == nullptr) {
+      log_if_needed("  ** unbox failed v2=%s", NodeClassNames[argument(7)->Opcode()]);
+      return false;
+    }
+  }
+  assert(arity == 1 || arity == 2, "arity %d not supported", arity);
+  const TypeVect* vt = TypeVect::make(elem_bt, num_elem);
+  const TypeFunc* call_type = OptoRuntime::Math_Vector_Vector_Type(arity, vt, vt);
+  address entry_addr = (address)entry->get_con();
+
+  const char* debug_name = "<unknown>";
+  if (!debug_name_oop->const_oop()->is_null_object()) {
+    size_t buflen = 100;
+    char* buf = NEW_ARENA_ARRAY(C->comp_arena(), char, buflen);
+    debug_name = debug_name_oop->const_oop()->as_instance()->java_lang_String_str(buf, buflen);
+  }
+  Node* vcall = make_runtime_call(RC_VECTOR,
+                                  call_type,
+                                  entry_addr,
+                                  debug_name,
+                                  TypePtr::BOTTOM,
+                                  opd1,
+                                  opd2);
+
+  vcall = gvn().transform(new ProjNode(gvn().transform(vcall), TypeFunc::Parms));
+
+  // Wrap it up in VectorBox to keep object type information.
+  Node* vbox = box_vector(vcall, vbox_type, elem_bt, num_elem);
+  set_result(vbox);
+  C->set_max_vector_size(MAX2(C->max_vector_size(), (uint)(num_elem * type2aelembytes(elem_bt))));
+  return true;
+}
+
 // <E, M>
 // long maskReductionCoerced(int oper, Class<? extends M> maskClass, Class<?> elemClass,
 //                          int length, M m, VectorMaskOp<M> defaultImpl)
@ -1844,50 +1911,6 @@ bool LibraryCallKit::inline_vector_rearrange() {
  return true;
 }

-static address get_vector_math_address(int vop, int bits, BasicType bt, char* name_ptr, int name_len) {
-  address addr = nullptr;
-  assert(UseVectorStubs, "sanity");
-  assert(name_ptr != nullptr, "unexpected");
-  assert((vop >= VectorSupport::VECTOR_OP_MATH_START) && (vop <= VectorSupport::VECTOR_OP_MATH_END), "unexpected");
-  int op = vop - VectorSupport::VECTOR_OP_MATH_START;
-
-  switch(bits) {
-    case 64:  //fallthough
-    case 128: //fallthough
-    case 256: //fallthough
-    case 512:
-      if (bt == T_FLOAT) {
-        snprintf(name_ptr, name_len, "vector_%s_float_%dbits_fixed", VectorSupport::mathname[op], bits);
-        addr = StubRoutines::_vector_f_math[exact_log2(bits/64)][op];
-      } else {
-        assert(bt == T_DOUBLE, "must be FP type only");
-        snprintf(name_ptr, name_len, "vector_%s_double_%dbits_fixed", VectorSupport::mathname[op], bits);
-        addr = StubRoutines::_vector_d_math[exact_log2(bits/64)][op];
-      }
-      break;
-    default:
-      if (!Matcher::supports_scalable_vector() || !Matcher::vector_size_supported(bt, bits/type2aelembytes(bt)) ) {
-        snprintf(name_ptr, name_len, "invalid");
-        addr = nullptr;
-        Unimplemented();
-      }
-      break;
-  }
-
-  if (addr == nullptr && Matcher::supports_scalable_vector()) {
-    if (bt == T_FLOAT) {
-      snprintf(name_ptr, name_len, "vector_%s_float_%dbits_scalable", VectorSupport::mathname[op], bits);
-      addr = StubRoutines::_vector_f_math[VectorSupport::VEC_SIZE_SCALABLE][op];
-    } else {
-      assert(bt == T_DOUBLE, "must be FP type only");
-      snprintf(name_ptr, name_len, "vector_%s_double_%dbits_scalable", VectorSupport::mathname[op], bits);
-      addr = StubRoutines::_vector_d_math[VectorSupport::VEC_SIZE_SCALABLE][op];
-    }
-  }
-
-  return addr;
-}
-
 //    public static
 //    <V extends Vector<E>,
 //     M  extends VectorMask<E>,
@ -2044,32 +2067,6 @@ bool LibraryCallKit::inline_vector_select_from() {
  return true;
 }

-Node* LibraryCallKit::gen_call_to_vector_math(int vector_api_op_id, BasicType bt, int num_elem, Node* opd1, Node* opd2) {
-  assert(UseVectorStubs, "sanity");
-  assert(vector_api_op_id >= VectorSupport::VECTOR_OP_MATH_START && vector_api_op_id <= VectorSupport::VECTOR_OP_MATH_END, "need valid op id");
-  assert(opd1 != nullptr, "must not be null");
-  const TypeVect* vt = TypeVect::make(bt, num_elem);
-  const TypeFunc* call_type = OptoRuntime::Math_Vector_Vector_Type(opd2 != nullptr ? 2 : 1, vt, vt);
-  char name[100] = "";
-
-  // Get address for vector math method.
-  address addr = get_vector_math_address(vector_api_op_id, vt->length_in_bytes() * BitsPerByte, bt, name, 100);
-
-  if (addr == nullptr) {
-    return nullptr;
-  }
-
-  assert(name[0] != '\0', "name must not be null");
-  Node* operation = make_runtime_call(RC_VECTOR,
-                                      call_type,
-                                      addr,
-                                      name,
-                                      TypePtr::BOTTOM,
-                                      opd1,
-                                      opd2);
-  return gvn().transform(new ProjNode(gvn().transform(operation), TypeFunc::Parms));
-}
-
 //  public static
 //  <V extends Vector<E>,
 //   M extends VectorMask<E>,
--- a/src/hotspot/share/prims/vectorSupport.cpp
+++ b/src/hotspot/share/prims/vectorSupport.cpp
@ -39,31 +39,9 @@
 #include "runtime/stackValue.hpp"
 #ifdef COMPILER2
 #include "opto/matcher.hpp"
+#include "opto/vectornode.hpp"
 #endif // COMPILER2

-#ifdef COMPILER2
-const char* VectorSupport::mathname[VectorSupport::NUM_VECTOR_OP_MATH] = {
-    "tan",
-    "tanh",
-    "sin",
-    "sinh",
-    "cos",
-    "cosh",
-    "asin",
-    "acos",
-    "atan",
-    "atan2",
-    "cbrt",
-    "log",
-    "log10",
-    "log1p",
-    "pow",
-    "exp",
-    "expm1",
-    "hypot",
-};
-#endif
-
 bool VectorSupport::is_vector(Klass* klass) {
  return klass->is_subclass_of(vmClasses::vector_VectorPayload_klass());
 }
@ -615,25 +593,6 @@ int VectorSupport::vop2ideal(jint id, BasicType bt) {
      break;
    }

-    case VECTOR_OP_TAN:
-    case VECTOR_OP_TANH:
-    case VECTOR_OP_SIN:
-    case VECTOR_OP_SINH:
-    case VECTOR_OP_COS:
-    case VECTOR_OP_COSH:
-    case VECTOR_OP_ASIN:
-    case VECTOR_OP_ACOS:
-    case VECTOR_OP_ATAN:
-    case VECTOR_OP_ATAN2:
-    case VECTOR_OP_CBRT:
-    case VECTOR_OP_LOG:
-    case VECTOR_OP_LOG10:
-    case VECTOR_OP_LOG1P:
-    case VECTOR_OP_POW:
-    case VECTOR_OP_EXP:
-    case VECTOR_OP_EXPM1:
-    case VECTOR_OP_HYPOT:
-      return Op_CallLeafVector;
    default: fatal("unknown op: %d", vop);
  }
  return 0; // Unimplemented
@ -655,16 +614,26 @@ JVM_ENTRY(jint, VectorSupport_GetMaxLaneCount(JNIEnv *env, jclass vsclazz, jobje
  return -1;
 } JVM_END

+JVM_ENTRY(jstring, VectorSupport_GetCPUFeatures(JNIEnv* env, jclass ignored))
+  const char* features_string = VM_Version::features_string();
+  assert(features_string != nullptr, "missing cpu features info");
+
+  oop result = java_lang_String::create_oop_from_str(features_string, CHECK_NULL);
+  return (jstring) JNIHandles::make_local(THREAD, result);
+JVM_END
+
 // JVM_RegisterVectorSupportMethods

 #define LANG "Ljava/lang/"
 #define CLS LANG "Class;"
+#define LSTR LANG "String;"

 #define CC (char*)  /*cast a literal from (const char*)*/
 #define FN_PTR(f) CAST_FROM_FN_PTR(void*, &f)

 static JNINativeMethod jdk_internal_vm_vector_VectorSupport_methods[] = {
-    {CC "getMaxLaneCount",   CC "(" CLS ")I", FN_PTR(VectorSupport_GetMaxLaneCount)}
+    {CC "getMaxLaneCount", CC "(" CLS ")I", FN_PTR(VectorSupport_GetMaxLaneCount)},
+    {CC "getCPUFeatures",  CC "()" LSTR,    FN_PTR(VectorSupport_GetCPUFeatures)}
 };

 #undef CC
@ -672,6 +641,7 @@ static JNINativeMethod jdk_internal_vm_vector_VectorSupport_methods[] = {

 #undef LANG
 #undef CLS
+#undef LSTR

 // This function is exported, used by NativeLookup.

--- a/src/hotspot/share/prims/vectorSupport.hpp
+++ b/src/hotspot/share/prims/vectorSupport.hpp
@ -101,36 +101,12 @@ class VectorSupport : AllStatic {
    VECTOR_OP_COMPRESS_BITS = 33,
    VECTOR_OP_EXPAND_BITS = 34,

-    // Vector Math Library
-    VECTOR_OP_TAN   = 101,
-    VECTOR_OP_TANH  = 102,
-    VECTOR_OP_SIN   = 103,
-    VECTOR_OP_SINH  = 104,
-    VECTOR_OP_COS   = 105,
-    VECTOR_OP_COSH  = 106,
-    VECTOR_OP_ASIN  = 107,
-    VECTOR_OP_ACOS  = 108,
-    VECTOR_OP_ATAN  = 109,
-    VECTOR_OP_ATAN2 = 110,
-    VECTOR_OP_CBRT  = 111,
-    VECTOR_OP_LOG   = 112,
-    VECTOR_OP_LOG10 = 113,
-    VECTOR_OP_LOG1P = 114,
-    VECTOR_OP_POW   = 115,
-    VECTOR_OP_EXP   = 116,
-    VECTOR_OP_EXPM1 = 117,
-    VECTOR_OP_HYPOT = 118,
-
    VECTOR_OP_SADD  = 119,
    VECTOR_OP_SSUB  = 120,
    VECTOR_OP_SUADD = 121,
    VECTOR_OP_SUSUB = 122,
    VECTOR_OP_UMIN  = 123,
    VECTOR_OP_UMAX  = 124,
-
-    VECTOR_OP_MATH_START = VECTOR_OP_TAN,
-    VECTOR_OP_MATH_END   = VECTOR_OP_HYPOT,
-    NUM_VECTOR_OP_MATH   = VECTOR_OP_MATH_END - VECTOR_OP_MATH_START + 1
  };

  enum {
@ -147,8 +123,6 @@ class VectorSupport : AllStatic {
    MODE_BITS_COERCED_LONG_TO_MASK = 1
  };

-  static const char* mathname[VectorSupport::NUM_VECTOR_OP_MATH];
-
  static int vop2ideal(jint vop, BasicType bt);
  static bool has_scalar_op(jint id);
  static bool is_unsigned_op(jint id);
--- a/src/hotspot/share/prims/whitebox.cpp
+++ b/src/hotspot/share/prims/whitebox.cpp
@ -1533,7 +1533,7 @@ WB_ENTRY(void, WB_ReadReservedMemory(JNIEnv* env, jobject o))
 WB_END

 WB_ENTRY(jstring, WB_GetCPUFeatures(JNIEnv* env, jobject o))
-  const char* features = VM_Version::features_string();
+  const char* features = VM_Version::cpu_info_string();
  ThreadToNativeFromVM ttn(thread);
  jstring features_string = env->NewStringUTF(features);

--- a/src/hotspot/share/runtime/abstract_vm_version.cpp
+++ b/src/hotspot/share/runtime/abstract_vm_version.cpp
@ -34,6 +34,7 @@ const char* Abstract_VM_Version::_s_internal_vm_info_string = Abstract_VM_Versio

 uint64_t Abstract_VM_Version::_features = 0;
 const char* Abstract_VM_Version::_features_string = "";
+const char* Abstract_VM_Version::_cpu_info_string = "";
 uint64_t Abstract_VM_Version::_cpu_features = 0;

 #ifndef SUPPORTS_NATIVE_CX8
@ -340,6 +341,19 @@ void Abstract_VM_Version::insert_features_names(char* buf, size_t buflen, const
  }
 }

+const char* Abstract_VM_Version::extract_features_string(const char* cpu_info_string,
+                                                         size_t cpu_info_string_len,
+                                                         size_t features_offset) {
+  assert(features_offset <= cpu_info_string_len, "");
+  if (features_offset < cpu_info_string_len) {
+    assert(cpu_info_string[features_offset + 0] == ',', "");
+    assert(cpu_info_string[features_offset + 1] == ' ', "");
+    return cpu_info_string + features_offset + 2; // skip initial ", "
+  } else {
+    return ""; // empty
+  }
+}
+
 bool Abstract_VM_Version::print_matching_lines_from_file(const char* filename, outputStream* st, const char* keywords_to_match[]) {
  char line[500];
  FILE* fp = os::fopen(filename, "r");
--- a/src/hotspot/share/runtime/abstract_vm_version.hpp
+++ b/src/hotspot/share/runtime/abstract_vm_version.hpp
@ -58,6 +58,8 @@ class Abstract_VM_Version: AllStatic {
  static uint64_t _features;
  static const char* _features_string;

+  static const char* _cpu_info_string;
+
  // Original CPU feature flags, not affected by VM settings.
  static uint64_t _cpu_features;

@ -128,7 +130,11 @@ class Abstract_VM_Version: AllStatic {

  static uint64_t features()           { return _features; }
  static const char* features_string() { return _features_string; }
+  static const char* cpu_info_string() { return _cpu_info_string; }
  static void insert_features_names(char* buf, size_t buflen, const char* features_names[]);
+  static const char* extract_features_string(const char* cpu_info_string,
+                                             size_t cpu_info_string_len,
+                                             size_t features_offset);

  static VirtualizationType get_detected_virtualization() {
    return _detected_virtualization;
--- a/src/hotspot/share/runtime/arguments.cpp
+++ b/src/hotspot/share/runtime/arguments.cpp
@ -3786,11 +3786,6 @@ jint Arguments::apply_ergo() {
      }
    }
    FLAG_SET_DEFAULT(EnableVectorAggressiveReboxing, false);
-
-    if (!FLAG_IS_DEFAULT(UseVectorStubs) && UseVectorStubs) {
-      warning("Disabling UseVectorStubs since EnableVectorSupport is turned off.");
-    }
-    FLAG_SET_DEFAULT(UseVectorStubs, false);
  }
 #endif // COMPILER2_OR_JVMCI

--- a/src/hotspot/share/runtime/os.cpp
+++ b/src/hotspot/share/runtime/os.cpp
@ -1168,7 +1168,7 @@ void os::print_cpu_info(outputStream* st, char* buf, size_t buflen) {
  // We access the raw value here because the assert in the accessor will
  // fail if the crash occurs before initialization of this value.
  st->print(" (initial active %d)", _initial_active_processor_count);
-  st->print(" %s", VM_Version::features_string());
+  st->print(" %s", VM_Version::cpu_info_string());
  st->cr();
  pd_print_cpu_info(st, buf, buflen);
 }
--- a/src/hotspot/share/runtime/stubRoutines.cpp
+++ b/src/hotspot/share/runtime/stubRoutines.cpp
@ -101,8 +101,6 @@ jint    StubRoutines::_verify_oop_count                         = 0;


 address StubRoutines::_string_indexof_array[4]   =    { nullptr };
-address StubRoutines::_vector_f_math[VectorSupport::NUM_VEC_SIZES][VectorSupport::NUM_VECTOR_OP_MATH] = {{nullptr}, {nullptr}};
-address StubRoutines::_vector_d_math[VectorSupport::NUM_VEC_SIZES][VectorSupport::NUM_VECTOR_OP_MATH] = {{nullptr}, {nullptr}};

 const char* StubRoutines::get_blob_name(StubGenBlobId id) {
  assert(0 <= id && id < StubGenBlobId::NUM_BLOBIDS, "invalid blob id");
--- a/src/hotspot/share/runtime/stubRoutines.hpp
+++ b/src/hotspot/share/runtime/stubRoutines.hpp
@ -305,10 +305,6 @@ public:

  /* special case: stub employs array of entries */

-  // Vector Math Routines
-  static address _vector_f_math[VectorSupport::NUM_VEC_SIZES][VectorSupport::NUM_VECTOR_OP_MATH];
-  static address _vector_d_math[VectorSupport::NUM_VEC_SIZES][VectorSupport::NUM_VECTOR_OP_MATH];
-
  static bool is_stub_code(address addr)                   { return contains(addr); }

  // generate code to implement method contains
--- a/src/hotspot/share/runtime/vmStructs.cpp
+++ b/src/hotspot/share/runtime/vmStructs.cpp
@ -702,6 +702,7 @@
     static_field(Abstract_VM_Version,         _s_internal_vm_info_string,                    const char*)                           \
     static_field(Abstract_VM_Version,         _features,                                     uint64_t)                              \
     static_field(Abstract_VM_Version,         _features_string,                              const char*)                           \
+     static_field(Abstract_VM_Version,         _cpu_info_string,                              const char*)                           \
     static_field(Abstract_VM_Version,         _vm_major_version,                             int)                                   \
     static_field(Abstract_VM_Version,         _vm_minor_version,                             int)                                   \
     static_field(Abstract_VM_Version,         _vm_security_version,                          int)                                   \
--- a/src/java.base/share/classes/jdk/internal/vm/vector/Utils.java
+++ b/src/java.base/share/classes/jdk/internal/vm/vector/Utils.java
@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package jdk.internal.vm.vector;
+
+import jdk.internal.reflect.CallerSensitive;
+import jdk.internal.reflect.Reflection;
+
+/**
+ * Miscellaneous utility methods.
+ */
+public class Utils {
+    public static final boolean DEBUG = Boolean.getBoolean("jdk.incubator.vector.DEBUG");
+
+    public static boolean isNonCapturingLambda(Object o) {
+        return o.getClass().getDeclaredFields().length == 0;
+    }
+
+    @CallerSensitive
+    public static void debug(String format, Object... args) {
+        if (DEBUG) {
+            Class<?> caller = Reflection.getCallerClass();
+            System.out.printf("DEBUG: %s: ", caller.getSimpleName());
+            System.out.printf(format, args);
+            System.out.println();
+        }
+    }
+}
--- a/src/java.base/share/classes/jdk/internal/vm/vector/VectorSupport.java
+++ b/src/java.base/share/classes/jdk/internal/vm/vector/VectorSupport.java
@ -30,6 +30,8 @@ import jdk.internal.misc.Unsafe;

 import java.util.function.*;

+import static jdk.internal.vm.vector.Utils.isNonCapturingLambda;
+
 public class VectorSupport {
    static {
        registerNatives();
@ -114,6 +116,9 @@ public class VectorSupport {
    public static final int VECTOR_OP_EXPM1 = 117;
    public static final int VECTOR_OP_HYPOT = 118;

+    public static final int VECTOR_OP_MATHLIB_FIRST = VECTOR_OP_TAN;
+    public static final int VECTOR_OP_MATHLIB_LAST  = VECTOR_OP_HYPOT;
+
    public static final int VECTOR_OP_SADD  = 119;
    public static final int VECTOR_OP_SSUB  = 120;
    public static final int VECTOR_OP_SUADD = 121;
@ -323,6 +328,23 @@ public class VectorSupport {

    /* ============================================================================ */

+//    public interface LibraryUnaryOperation<V extends Vector<?>,
+//            M extends VectorMask<?>> {
+//        V apply(MemorySegment entry, V v, M m);
+//    }
+
+    @IntrinsicCandidate
+    public static
+    <V extends Vector<E>, E>
+    V libraryUnaryOp(long addr, Class<? extends V> vClass, Class<E> eClass, int length, String debugName,
+                     V v,
+                     UnaryOperation<V,?> defaultImpl) {
+        assert isNonCapturingLambda(defaultImpl) : defaultImpl;
+        return defaultImpl.apply(v, null);
+    }
+
+    /* ============================================================================ */
+
    public interface BinaryOperation<VM extends VectorPayload,
                                     M extends VectorMask<?>> {
        VM apply(VM v1, VM v2, M m);
@ -341,6 +363,24 @@ public class VectorSupport {
        assert isNonCapturingLambda(defaultImpl) : defaultImpl;
        return defaultImpl.apply(v1, v2, m);
    }
+
+    /* ============================================================================ */
+
+//    public interface LibraryBinaryOperation<V extends VectorPayload,
+//            M extends VectorMask<?>> {
+//        V apply(MemorySegment entry, V v1, V v2, M m);
+//    }
+
+    @IntrinsicCandidate
+    public static
+    <V extends VectorPayload, E>
+    V libraryBinaryOp(long addr, Class<? extends V> vClass, Class<E> eClass, int length, String debugName,
+                      V v1, V v2,
+                      BinaryOperation<V,?> defaultImpl) {
+        assert isNonCapturingLambda(defaultImpl) : defaultImpl;
+        return defaultImpl.apply(v1, v2, null);
+    }
+
    /* ============================================================================ */

    public interface SelectFromTwoVector<V extends Vector<?>> {
@ -718,13 +758,19 @@ public class VectorSupport {

    /* ============================================================================ */

+    // Returns a string containing a list of CPU features VM detected.
+    public static native String getCPUFeatures();
+
+    /* ============================================================================ */
+
    // query the JVM's supported vector sizes and types
    public static native int getMaxLaneCount(Class<?> etype);

    /* ============================================================================ */

-    public static boolean isNonCapturingLambda(Object o) {
-        return o.getClass().getDeclaredFields().length == 0;
+    @SuppressWarnings({"restricted"})
+    public static void loadNativeLibrary(String name) {
+        System.loadLibrary(name);
    }

    /* ============================================================================ */
--- a/src/java.base/share/classes/module-info.java
+++ b/src/java.base/share/classes/module-info.java
@ -272,6 +272,7 @@ module java.base {
        java.security.jgss,
        java.smartcardio,
        jdk.charsets,
+        jdk.incubator.vector,
        jdk.internal.vm.ci,
        jdk.jlink,
        jdk.jpackage,
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ByteVector.java
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ByteVector.java
@ -689,7 +689,7 @@ public abstract class ByteVector extends AbstractVector<Byte> {
            if (op == ZOMO) {
                return blend(broadcast(-1), compare(NE, 0));
            }
-            if (op == NOT) {
+            else if (op == NOT) {
                return broadcast(-1).lanewise(XOR, this);
            }
        }
@ -717,7 +717,7 @@ public abstract class ByteVector extends AbstractVector<Byte> {
            if (op == ZOMO) {
                return blend(broadcast(-1), compare(NE, 0, m));
            }
-            if (op == NOT) {
+            else if (op == NOT) {
                return lanewise(XOR, broadcast(-1), m);
            }
        }
@ -728,6 +728,7 @@ public abstract class ByteVector extends AbstractVector<Byte> {
            UN_IMPL.find(op, opc, ByteVector::unaryOperations));
    }

+
    private static final
    ImplCache<Unary, UnaryOperation<ByteVector, VectorMask<Byte>>>
        UN_IMPL = new ImplCache<>(Unary.class, ByteVector.class);
@ -824,6 +825,7 @@ public abstract class ByteVector extends AbstractVector<Byte> {
                    = this.compare(EQ, (byte) 0, m);
                return this.blend(that, mask);
            }
+
            if (opKind(op, VO_SHIFT)) {
                // As per shift specification for Java, mask the shift count.
                // This allows the JIT to ignore some ISA details.
@ -850,6 +852,7 @@ public abstract class ByteVector extends AbstractVector<Byte> {
            BIN_IMPL.find(op, opc, ByteVector::binaryOperations));
    }

+
    private static final
    ImplCache<Binary, BinaryOperation<ByteVector, VectorMask<Byte>>>
        BIN_IMPL = new ImplCache<>(Binary.class, ByteVector.class);
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/CPUFeatures.java
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/CPUFeatures.java
@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package jdk.incubator.vector;
+
+import jdk.internal.vm.vector.VectorSupport;
+
+import java.util.Locale;
+import java.util.Set;
+
+import static jdk.incubator.vector.Util.requires;
+import static jdk.internal.util.Architecture.isX64;
+import static jdk.internal.vm.vector.Utils.debug;
+
+/**
+ * Enumerates CPU ISA extensions supported by the JVM on the current hardware.
+ */
+/*package-private*/ class CPUFeatures {
+    private static final Set<String> features = getCPUFeatures();
+
+    private static Set<String> getCPUFeatures() {
+        String featuresString = VectorSupport.getCPUFeatures();
+        debug(featuresString);
+        String[] features = featuresString.toLowerCase(Locale.ROOT)
+                                          .split(",? "); // " " or ", " are used as a delimiter by JVM
+        assert validateFeatures(features);
+        return Set.of(features);
+    }
+
+    private static boolean validateFeatures(String[] features) {
+        for (String s : features) {
+            assert s != null && s.matches("[a-z0-9._]+") : String.format("Invalid CPU feature name: '%s'", s);
+        }
+        return true;
+    }
+
+    private static boolean hasFeature(String feature) {
+        return features.contains(feature.toLowerCase(Locale.ROOT));
+    }
+
+    public static class X64 {
+        public static boolean SUPPORTS_AVX      = hasFeature("avx");
+        public static boolean SUPPORTS_AVX2     = hasFeature("avx2");
+        public static boolean SUPPORTS_AVX512F  = hasFeature("avx512f");
+        public static boolean SUPPORTS_AVX512DQ = hasFeature("avx512dq");
+
+        static {
+            requires(isX64(), "unsupported platform");
+
+            debug("AVX=%b; AVX2=%b; AVX512F=%b; AVX512DQ=%b",
+                  SUPPORTS_AVX, SUPPORTS_AVX2, SUPPORTS_AVX512F, SUPPORTS_AVX512DQ);
+
+            assert SUPPORTS_AVX512F == (VectorShape.getMaxVectorBitSize(int.class)   == 512);
+            assert SUPPORTS_AVX2    == (VectorShape.getMaxVectorBitSize(byte.class)  >= 256);
+            assert SUPPORTS_AVX     == (VectorShape.getMaxVectorBitSize(float.class) >= 256);
+        }
+    }
+
+    public static Set<String> features() {
+        return features;
+    }
+}
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/DoubleVector.java
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/DoubleVector.java
@ -678,6 +678,9 @@ public abstract class DoubleVector extends AbstractVector<Double> {
            if (op == ZOMO) {
                return blend(broadcast(-1), compare(NE, 0));
            }
+            else if (opKind(op, VO_MATHLIB)) {
+                return unaryMathOp(op);
+            }
        }
        int opc = opCode(op);
        return VectorSupport.unaryOp(
@ -703,6 +706,9 @@ public abstract class DoubleVector extends AbstractVector<Double> {
            if (op == ZOMO) {
                return blend(broadcast(-1), compare(NE, 0, m));
            }
+            else if (opKind(op, VO_MATHLIB)) {
+                return blend(unaryMathOp(op), m);
+            }
        }
        int opc = opCode(op);
        return VectorSupport.unaryOp(
@ -711,6 +717,13 @@ public abstract class DoubleVector extends AbstractVector<Double> {
            UN_IMPL.find(op, opc, DoubleVector::unaryOperations));
    }

+    @ForceInline
+    final
+    DoubleVector unaryMathOp(VectorOperators.Unary op) {
+        return VectorMathLibrary.unaryMathOp(op, opCode(op), species(), DoubleVector::unaryOperations,
+                                             this);
+    }
+
    private static final
    ImplCache<Unary, UnaryOperation<DoubleVector, VectorMask<Double>>>
        UN_IMPL = new ImplCache<>(Unary.class, DoubleVector.class);
@ -781,6 +794,9 @@ public abstract class DoubleVector extends AbstractVector<Double> {
                    = this.viewAsIntegralLanes().compare(EQ, (long) 0);
                return this.blend(that, mask.cast(vspecies()));
            }
+            else if (opKind(op, VO_MATHLIB)) {
+                return binaryMathOp(op, that);
+            }
        }

        int opc = opCode(op);
@ -815,6 +831,10 @@ public abstract class DoubleVector extends AbstractVector<Double> {
                    = bits.compare(EQ, (long) 0, m.cast(bits.vspecies()));
                return this.blend(that, mask.cast(vspecies()));
            }
+            else if (opKind(op, VO_MATHLIB)) {
+                return this.blend(binaryMathOp(op, that), m);
+            }
+
        }

        int opc = opCode(op);
@ -824,6 +844,13 @@ public abstract class DoubleVector extends AbstractVector<Double> {
            BIN_IMPL.find(op, opc, DoubleVector::binaryOperations));
    }

+    @ForceInline
+    final
+    DoubleVector binaryMathOp(VectorOperators.Binary op, DoubleVector that) {
+        return VectorMathLibrary.binaryMathOp(op, opCode(op), species(), DoubleVector::binaryOperations,
+                                              this, that);
+    }
+
    private static final
    ImplCache<Binary, BinaryOperation<DoubleVector, VectorMask<Double>>>
        BIN_IMPL = new ImplCache<>(Binary.class, DoubleVector.class);
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/FloatVector.java
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/FloatVector.java
@ -678,6 +678,9 @@ public abstract class FloatVector extends AbstractVector<Float> {
            if (op == ZOMO) {
                return blend(broadcast(-1), compare(NE, 0));
            }
+            else if (opKind(op, VO_MATHLIB)) {
+                return unaryMathOp(op);
+            }
        }
        int opc = opCode(op);
        return VectorSupport.unaryOp(
@ -703,6 +706,9 @@ public abstract class FloatVector extends AbstractVector<Float> {
            if (op == ZOMO) {
                return blend(broadcast(-1), compare(NE, 0, m));
            }
+            else if (opKind(op, VO_MATHLIB)) {
+                return blend(unaryMathOp(op), m);
+            }
        }
        int opc = opCode(op);
        return VectorSupport.unaryOp(
@ -711,6 +717,13 @@ public abstract class FloatVector extends AbstractVector<Float> {
            UN_IMPL.find(op, opc, FloatVector::unaryOperations));
    }

+    @ForceInline
+    final
+    FloatVector unaryMathOp(VectorOperators.Unary op) {
+        return VectorMathLibrary.unaryMathOp(op, opCode(op), species(), FloatVector::unaryOperations,
+                                             this);
+    }
+
    private static final
    ImplCache<Unary, UnaryOperation<FloatVector, VectorMask<Float>>>
        UN_IMPL = new ImplCache<>(Unary.class, FloatVector.class);
@ -781,6 +794,9 @@ public abstract class FloatVector extends AbstractVector<Float> {
                    = this.viewAsIntegralLanes().compare(EQ, (int) 0);
                return this.blend(that, mask.cast(vspecies()));
            }
+            else if (opKind(op, VO_MATHLIB)) {
+                return binaryMathOp(op, that);
+            }
        }

        int opc = opCode(op);
@ -815,6 +831,10 @@ public abstract class FloatVector extends AbstractVector<Float> {
                    = bits.compare(EQ, (int) 0, m.cast(bits.vspecies()));
                return this.blend(that, mask.cast(vspecies()));
            }
+            else if (opKind(op, VO_MATHLIB)) {
+                return this.blend(binaryMathOp(op, that), m);
+            }
+
        }

        int opc = opCode(op);
@ -824,6 +844,13 @@ public abstract class FloatVector extends AbstractVector<Float> {
            BIN_IMPL.find(op, opc, FloatVector::binaryOperations));
    }

+    @ForceInline
+    final
+    FloatVector binaryMathOp(VectorOperators.Binary op, FloatVector that) {
+        return VectorMathLibrary.binaryMathOp(op, opCode(op), species(), FloatVector::binaryOperations,
+                                              this, that);
+    }
+
    private static final
    ImplCache<Binary, BinaryOperation<FloatVector, VectorMask<Float>>>
        BIN_IMPL = new ImplCache<>(Binary.class, FloatVector.class);
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/IntVector.java
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/IntVector.java
@ -689,7 +689,7 @@ public abstract class IntVector extends AbstractVector<Integer> {
            if (op == ZOMO) {
                return blend(broadcast(-1), compare(NE, 0));
            }
-            if (op == NOT) {
+            else if (op == NOT) {
                return broadcast(-1).lanewise(XOR, this);
            }
        }
@ -717,7 +717,7 @@ public abstract class IntVector extends AbstractVector<Integer> {
            if (op == ZOMO) {
                return blend(broadcast(-1), compare(NE, 0, m));
            }
-            if (op == NOT) {
+            else if (op == NOT) {
                return lanewise(XOR, broadcast(-1), m);
            }
        }
@ -728,6 +728,7 @@ public abstract class IntVector extends AbstractVector<Integer> {
            UN_IMPL.find(op, opc, IntVector::unaryOperations));
    }

+
    private static final
    ImplCache<Unary, UnaryOperation<IntVector, VectorMask<Integer>>>
        UN_IMPL = new ImplCache<>(Unary.class, IntVector.class);
@ -824,6 +825,7 @@ public abstract class IntVector extends AbstractVector<Integer> {
                    = this.compare(EQ, (int) 0, m);
                return this.blend(that, mask);
            }
+
            if (opKind(op, VO_SHIFT)) {
                // As per shift specification for Java, mask the shift count.
                // This allows the JIT to ignore some ISA details.
@ -850,6 +852,7 @@ public abstract class IntVector extends AbstractVector<Integer> {
            BIN_IMPL.find(op, opc, IntVector::binaryOperations));
    }

+
    private static final
    ImplCache<Binary, BinaryOperation<IntVector, VectorMask<Integer>>>
        BIN_IMPL = new ImplCache<>(Binary.class, IntVector.class);
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/LongVector.java
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/LongVector.java
@ -647,7 +647,7 @@ public abstract class LongVector extends AbstractVector<Long> {
            if (op == ZOMO) {
                return blend(broadcast(-1), compare(NE, 0));
            }
-            if (op == NOT) {
+            else if (op == NOT) {
                return broadcast(-1).lanewise(XOR, this);
            }
        }
@ -675,7 +675,7 @@ public abstract class LongVector extends AbstractVector<Long> {
            if (op == ZOMO) {
                return blend(broadcast(-1), compare(NE, 0, m));
            }
-            if (op == NOT) {
+            else if (op == NOT) {
                return lanewise(XOR, broadcast(-1), m);
            }
        }
@ -686,6 +686,7 @@ public abstract class LongVector extends AbstractVector<Long> {
            UN_IMPL.find(op, opc, LongVector::unaryOperations));
    }

+
    private static final
    ImplCache<Unary, UnaryOperation<LongVector, VectorMask<Long>>>
        UN_IMPL = new ImplCache<>(Unary.class, LongVector.class);
@ -782,6 +783,7 @@ public abstract class LongVector extends AbstractVector<Long> {
                    = this.compare(EQ, (long) 0, m);
                return this.blend(that, mask);
            }
+
            if (opKind(op, VO_SHIFT)) {
                // As per shift specification for Java, mask the shift count.
                // This allows the JIT to ignore some ISA details.
@ -808,6 +810,7 @@ public abstract class LongVector extends AbstractVector<Long> {
            BIN_IMPL.find(op, opc, LongVector::binaryOperations));
    }

+
    private static final
    ImplCache<Binary, BinaryOperation<LongVector, VectorMask<Long>>>
        BIN_IMPL = new ImplCache<>(Binary.class, LongVector.class);
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ShortVector.java
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ShortVector.java
@ -689,7 +689,7 @@ public abstract class ShortVector extends AbstractVector<Short> {
            if (op == ZOMO) {
                return blend(broadcast(-1), compare(NE, 0));
            }
-            if (op == NOT) {
+            else if (op == NOT) {
                return broadcast(-1).lanewise(XOR, this);
            }
        }
@ -717,7 +717,7 @@ public abstract class ShortVector extends AbstractVector<Short> {
            if (op == ZOMO) {
                return blend(broadcast(-1), compare(NE, 0, m));
            }
-            if (op == NOT) {
+            else if (op == NOT) {
                return lanewise(XOR, broadcast(-1), m);
            }
        }
@ -728,6 +728,7 @@ public abstract class ShortVector extends AbstractVector<Short> {
            UN_IMPL.find(op, opc, ShortVector::unaryOperations));
    }

+
    private static final
    ImplCache<Unary, UnaryOperation<ShortVector, VectorMask<Short>>>
        UN_IMPL = new ImplCache<>(Unary.class, ShortVector.class);
@ -824,6 +825,7 @@ public abstract class ShortVector extends AbstractVector<Short> {
                    = this.compare(EQ, (short) 0, m);
                return this.blend(that, mask);
            }
+
            if (opKind(op, VO_SHIFT)) {
                // As per shift specification for Java, mask the shift count.
                // This allows the JIT to ignore some ISA details.
@ -850,6 +852,7 @@ public abstract class ShortVector extends AbstractVector<Short> {
            BIN_IMPL.find(op, opc, ShortVector::binaryOperations));
    }

+
    private static final
    ImplCache<Binary, BinaryOperation<ShortVector, VectorMask<Short>>>
        BIN_IMPL = new ImplCache<>(Binary.class, ShortVector.class);
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Util.java
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Util.java
@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package jdk.incubator.vector;
+
+/*package-private*/ class Util {
+    public static void requires(boolean cond, String message) {
+        if (!cond) {
+            throw new InternalError(message);
+        }
+    }
+}
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/VectorMathLibrary.java
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/VectorMathLibrary.java
@ -0,0 +1,323 @@
+/*
+ * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package jdk.incubator.vector;
+
+import jdk.internal.util.StaticProperty;
+import jdk.internal.vm.annotation.DontInline;
+import jdk.internal.vm.annotation.ForceInline;
+import jdk.internal.vm.annotation.Stable;
+import jdk.internal.vm.vector.VectorSupport;
+
+import java.lang.foreign.MemorySegment;
+import java.lang.foreign.SymbolLookup;
+import java.util.function.IntFunction;
+
+import static jdk.incubator.vector.Util.requires;
+import static jdk.incubator.vector.VectorOperators.*;
+import static jdk.internal.util.Architecture.*;
+import static jdk.internal.vm.vector.Utils.debug;
+
+/**
+ * A wrapper for native vector math libraries bundled with the JDK (SVML and SLEEF).
+ * Binds vector operations to native implementations provided by the libraries.
+ */
+/*package-private*/ class VectorMathLibrary {
+    private static final SymbolLookup LOOKUP = SymbolLookup.loaderLookup();
+
+    interface Library {
+        String symbolName(Operator op, VectorSpecies<?> vspecies);
+        boolean isSupported(Operator op, VectorSpecies<?> vspecies);
+
+        String SVML  = "svml";
+        String SLEEF = "sleef";
+        String JAVA  = "java";
+
+        static Library getInstance() {
+            String libraryName = System.getProperty("jdk.incubator.vector.VectorMathLibrary", getDefaultName());
+            try {
+                return switch (libraryName) {
+                    case SVML  -> new SVML();
+                    case SLEEF -> new SLEEF();
+                    case JAVA  -> new Java();
+                    default    -> throw new IllegalArgumentException("Unsupported vector math library: " + libraryName);
+                };
+            } catch (Throwable e) {
+                debug("Error during initialization of %s library: %s", libraryName, e);
+                return new Java(); // fallback
+            }
+        }
+
+        static String getDefaultName() {
+            return switch (StaticProperty.osArch()) {
+                case "amd64", "x86_64" -> SVML;
+                case "aarch64", "riscv64" -> SLEEF;
+                default -> JAVA;
+            };
+        }
+    }
+
+    private static final Library LIBRARY = Library.getInstance();
+
+    static {
+        debug("%s library is used (cpu features: %s)", LIBRARY.getClass().getSimpleName(), CPUFeatures.features());
+    }
+
+    private static class Java implements Library {
+        @Override
+        public String symbolName(Operator op, VectorSpecies<?> vspecies) {
+            throw new UnsupportedOperationException();
+        }
+
+        @Override
+        public boolean isSupported(Operator op, VectorSpecies<?> vspecies) {
+            return false; // always use default implementation
+        }
+    }
+
+    /**
+     * Naming convention in SVML vector math library.
+     * All the methods are named as __jsvml_<op><T><N>_ha_<VV> where:
+     *      ha stands for high accuracy
+     *      <T> is optional to indicate float/double
+     *              Set to f for vector float operation
+     *              Omitted for vector double operation
+     *      <N> is the number of elements in the vector
+     *              1, 2, 4, 8, 16
+     *              e.g. 128 bit float vector has 4 float elements
+     *      <VV> indicates the avx/sse level:
+     *              z0 is AVX512, l9 is AVX2, e9 is AVX1 and ex is for SSE2
+     *      e.g. __jsvml_expf16_ha_z0 is the method for computing 16 element vector float exp using AVX 512 insns
+     *           __jsvml_exp8_ha_z0 is the method for computing 8 element vector double exp using AVX 512 insns
+     */
+    private static class SVML implements Library {
+        static {
+            loadNativeLibrary();
+        }
+
+        private static void loadNativeLibrary() {
+            requires(isX64(), "SVML library is x64-specific");
+            VectorSupport.loadNativeLibrary("jsvml");
+        }
+
+        private static String suffix(VectorSpecies<?> vspecies) {
+            assert vspecies.vectorBitSize() <= VectorShape.getMaxVectorBitSize(vspecies.elementType());
+
+            if (vspecies.vectorBitSize() == 512) {
+                assert CPUFeatures.X64.SUPPORTS_AVX512F;
+                return "z0";
+            } else if (CPUFeatures.X64.SUPPORTS_AVX2) {
+                return "l9";
+            } else if (CPUFeatures.X64.SUPPORTS_AVX) {
+                return "e9";
+            } else {
+                return "ex";
+            }
+        }
+
+        @Override
+        public String symbolName(Operator op, VectorSpecies<?> vspecies) {
+            String suffix = suffix(vspecies);
+            String elemType = (vspecies.elementType() == float.class ? "f" : "");
+            int vlen = (vspecies == FloatVector.SPECIES_64 ? 4 : vspecies.length()); // reuse 128-bit variant for 64-bit float vectors
+            return String.format("__jsvml_%s%s%d_ha_%s", op.operatorName(), elemType, vlen, suffix);
+        }
+
+        @Override
+        public boolean isSupported(Operator op, VectorSpecies<?> vspecies) {
+            Class<?> etype = vspecies.elementType();
+            if (etype != float.class && etype != double.class) {
+                return false; // only FP types are supported
+            }
+            int maxLaneCount = VectorSupport.getMaxLaneCount(vspecies.elementType());
+            if (vspecies.length() > maxLaneCount) {
+                return false; // lacking vector support (either hardware or disabled on JVM side)
+            }
+            if (vspecies == DoubleVector.SPECIES_64) {
+                return false; // 64-bit double vectors are not supported
+            }
+            if (vspecies.vectorBitSize() == 512) {
+                if (op == LOG || op == LOG10 || op == POW) {
+                    return CPUFeatures.X64.SUPPORTS_AVX512DQ; // requires AVX512DQ CPU support
+                }
+            } else if (op == POW) {
+                return false; // not supported
+            }
+            return true;
+        }
+    }
+
+    /**
+     * Naming convention in SLEEF-based vector math library .
+     * All the methods are named as <OP><T><N>_<U><suffix> where:
+     *     <OP>     is the operation name, e.g. sin
+     *     <T>      is optional to indicate float/double
+     *              "f/d" for vector float/double operation
+     *     <N>      is the number of elements in the vector
+     *              "2/4" for neon, and "x" for sve/rvv
+     *     <U>      is the precision level
+     *              "u10/u05" represents 1.0/0.5 ULP error bounds
+     *               We use "u10" for all operations by default
+     *               But for those functions do not have u10 support, we use "u05" instead
+     *     <suffix> indicates neon/sve/rvv
+     *              "sve/advsimd/rvv" for sve/neon/rvv implementations
+     *     e.g. sinfx_u10sve is the method for computing vector float sin using SVE instructions
+     *          cosd2_u10advsimd is the method for computing 2 elements vector double cos using NEON instructions
+     */
+    private static class SLEEF implements Library {
+        static {
+            VectorSupport.loadNativeLibrary("sleef");
+        }
+
+        private static String suffix(VectorShape vshape, boolean isShapeAgnostic) {
+            if (isAARCH64()) {
+                if (isShapeAgnostic) {
+                    return "sve";
+                } else {
+                    return "advsimd";
+                }
+            } else if (isRISCV64()) {
+                assert isShapeAgnostic : "not supported";
+                return "rvv";
+            } else {
+                throw new InternalError("unsupported platform");
+            }
+        }
+
+        private static String precisionLevel(Operator op) {
+            return (op == HYPOT ? "u05" : "u10");
+        }
+
+        @Override
+        public String symbolName(Operator op, VectorSpecies<?> vspecies) {
+            int vlen = (vspecies == FloatVector.SPECIES_64 ? 4 : vspecies.length()); // reuse 128-bit variant for 64-bit float vectors
+            boolean isShapeAgnostic = isRISCV64() || (isAARCH64() && vspecies.vectorBitSize() > 128);
+            return String.format("%s%s%s_%s%s", op.operatorName(),
+                                 (vspecies.elementType() == float.class ? "f" : "d"),
+                                 (isShapeAgnostic ? "x" : Integer.toString(vlen)),
+                                 precisionLevel(op),
+                                 suffix(vspecies.vectorShape(), isShapeAgnostic));
+        }
+
+        @Override
+        public boolean isSupported(Operator op, VectorSpecies<?> vspecies) {
+            Class<?> etype = vspecies.elementType();
+            if (etype != float.class && etype != double.class) {
+                return false; // only FP element types are supported
+            }
+            int maxLaneCount = VectorSupport.getMaxLaneCount(vspecies.elementType());
+            if (vspecies.length() > maxLaneCount) {
+                return false; // lacking vector support (either hardware or disabled on JVM side)
+            }
+            if (vspecies == DoubleVector.SPECIES_64) {
+                return false; // 64-bit double vectors are not supported
+            }
+            if (op == TANH) {
+                return false; // skip due to performance considerations
+            }
+            return true;
+        }
+    }
+
+    private static final int SIZE = VectorSupport.VECTOR_OP_MATHLIB_LAST - VectorSupport.VECTOR_OP_MATHLIB_FIRST + 1;
+
+    private record Entry<T> (String name, MemorySegment entry, T impl) {}
+
+    private static final @Stable Entry<?>[][][] LIBRARY_ENTRIES = new Entry<?>[SIZE][LaneType.SK_LIMIT][VectorShape.SK_LIMIT]; // OP x SHAPE x TYPE
+
+    @ForceInline
+    private static <T> Entry<T> lookup(Operator op, int opc, VectorSpecies<?> vspecies, IntFunction<T> implSupplier) {
+        int idx = opc - VectorSupport.VECTOR_OP_MATHLIB_FIRST;
+        int elem_idx = ((AbstractSpecies<?>)vspecies).laneType.switchKey;
+        int shape_idx = vspecies.vectorShape().switchKey;
+        @SuppressWarnings({"unchecked"})
+        Entry<T> entry = (Entry<T>)LIBRARY_ENTRIES[idx][elem_idx][shape_idx];
+        if (entry == null) {
+            entry = constructEntry(op, opc, vspecies, implSupplier);
+            LIBRARY_ENTRIES[idx][elem_idx][shape_idx] = entry;
+        }
+        return entry;
+    }
+
+    @DontInline
+    private static
+    <E,T>
+    Entry<T> constructEntry(Operator op, int opc, VectorSpecies<E> vspecies, IntFunction<T> implSupplier) {
+        if (LIBRARY.isSupported(op, vspecies)) {
+            String symbol = LIBRARY.symbolName(op, vspecies);
+            try {
+                MemorySegment addr = LOOKUP.findOrThrow(symbol);
+                debug("%s %s => 0x%016x\n", op, symbol, addr.address());
+                T impl = implSupplier.apply(opc); // TODO: should call the very same native implementation eventually (once FFM API supports vectors)
+                return new Entry<>(symbol, addr, impl);
+            } catch (RuntimeException e) {
+              throw new InternalError("not supported: " + op + " " + vspecies + " " + symbol, e);
+            }
+        } else {
+            return new Entry<>(null, MemorySegment.NULL, implSupplier.apply(opc));
+        }
+    }
+
+    @ForceInline
+    /*package-private*/ static
+    <E, V extends Vector<E>>
+    V unaryMathOp(Unary op, int opc, VectorSpecies<E> vspecies,
+                  IntFunction<VectorSupport.UnaryOperation<V,?>> implSupplier,
+                  V v) {
+        var entry = lookup(op, opc, vspecies, implSupplier);
+
+        long entryAddress = entry.entry.address();
+        if (entryAddress != 0) {
+            @SuppressWarnings({"unchecked"})
+            Class<V> vt = (Class<V>)vspecies.vectorType();
+            return VectorSupport.libraryUnaryOp(
+                    entry.entry.address(), vt, vspecies.elementType(), vspecies.length(), entry.name,
+                    v,
+                    entry.impl);
+        } else {
+            return entry.impl.apply(v, null);
+        }
+    }
+
+    @ForceInline
+    /*package-private*/ static
+    <E, V extends Vector<E>>
+    V binaryMathOp(Binary op, int opc, VectorSpecies<E> vspecies,
+                   IntFunction<VectorSupport.BinaryOperation<V,?>> implSupplier,
+                   V v1, V v2) {
+        var entry = lookup(op, opc, vspecies, implSupplier);
+
+        long entryAddress = entry.entry.address();
+        if (entryAddress != 0) {
+            @SuppressWarnings({"unchecked"})
+            Class<V> vt = (Class<V>)vspecies.vectorType();
+            return VectorSupport.libraryBinaryOp(
+                    entry.entry.address(), vt, vspecies.elementType(), vspecies.length(), entry.name,
+                    v1, v2,
+                    entry.impl);
+        } else {
+            return entry.impl.apply(v1, v2, null);
+        }
+    }
+}
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/VectorOperators.java
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/VectorOperators.java
@ -33,6 +33,8 @@ import jdk.internal.vm.annotation.Stable;

 import jdk.internal.vm.vector.VectorSupport;

+import static jdk.internal.vm.vector.Utils.isNonCapturingLambda;
+
 /**
 * This class consists solely of static constants
 * that describe lane-wise vector operations, plus nested interfaces
@ -426,6 +428,7 @@ public abstract class VectorOperators {
        VO_SPECIAL                 = 0x080, // random special handling
        VO_NOFP                    = 0x100,
        VO_ONLYFP                  = 0x200,
+        VO_MATHLIB                 = 0x400,
        VO_OPCODE_VALID            = 0x800,
        VO_OPCODE_SHIFT            = 12,
        VO_OPCODE_LIMIT            = 0x400,
@ -476,67 +479,67 @@ public abstract class VectorOperators {
    /** Produce {@code sin(a)}.  Floating only.
     *  Not guaranteed to be semi-monotonic. See section "Operations on floating point vectors" above
     */
-    public static final /*float*/ Unary SIN = unary("SIN", "sin", VectorSupport.VECTOR_OP_SIN, VO_ONLYFP);
+    public static final /*float*/ Unary SIN = unary("SIN", "sin", VectorSupport.VECTOR_OP_SIN, VO_ONLYFP | VO_SPECIAL | VO_MATHLIB);
    /** Produce {@code cos(a)}.  Floating only.
     *  Not guaranteed to be semi-monotonic. See section "Operations on floating point vectors" above
     */
-    public static final /*float*/ Unary COS = unary("COS", "cos", VectorSupport.VECTOR_OP_COS, VO_ONLYFP);
+    public static final /*float*/ Unary COS = unary("COS", "cos", VectorSupport.VECTOR_OP_COS, VO_ONLYFP | VO_SPECIAL | VO_MATHLIB);
    /** Produce {@code tan(a)}.  Floating only.
     *  Not guaranteed to be semi-monotonic. See section "Operations on floating point vectors" above
     */
-    public static final /*float*/ Unary TAN = unary("TAN", "tan", VectorSupport.VECTOR_OP_TAN, VO_ONLYFP);
+    public static final /*float*/ Unary TAN = unary("TAN", "tan", VectorSupport.VECTOR_OP_TAN, VO_ONLYFP | VO_SPECIAL | VO_MATHLIB);
    /** Produce {@code asin(a)}.  Floating only.
     *  Not guaranteed to be semi-monotonic. See section "Operations on floating point vectors" above
     */
-    public static final /*float*/ Unary ASIN = unary("ASIN", "asin", VectorSupport.VECTOR_OP_ASIN, VO_ONLYFP);
+    public static final /*float*/ Unary ASIN = unary("ASIN", "asin", VectorSupport.VECTOR_OP_ASIN, VO_ONLYFP | VO_SPECIAL | VO_MATHLIB);
    /** Produce {@code acos(a)}.  Floating only.
     *  Not guaranteed to be semi-monotonic. See section "Operations on floating point vectors" above
     */
-    public static final /*float*/ Unary ACOS = unary("ACOS", "acos", VectorSupport.VECTOR_OP_ACOS, VO_ONLYFP);
+    public static final /*float*/ Unary ACOS = unary("ACOS", "acos", VectorSupport.VECTOR_OP_ACOS, VO_ONLYFP | VO_SPECIAL | VO_MATHLIB);
    /** Produce {@code atan(a)}.  Floating only.
     *  Not guaranteed to be semi-monotonic. See section "Operations on floating point vectors" above
     */
-    public static final /*float*/ Unary ATAN = unary("ATAN", "atan", VectorSupport.VECTOR_OP_ATAN, VO_ONLYFP);
+    public static final /*float*/ Unary ATAN = unary("ATAN", "atan", VectorSupport.VECTOR_OP_ATAN, VO_ONLYFP | VO_SPECIAL | VO_MATHLIB);

    /** Produce {@code exp(a)}.  Floating only.
     *  Not guaranteed to be semi-monotonic. See section "Operations on floating point vectors" above
     */
-    public static final /*float*/ Unary EXP = unary("EXP", "exp", VectorSupport.VECTOR_OP_EXP, VO_ONLYFP);
+    public static final /*float*/ Unary EXP = unary("EXP", "exp", VectorSupport.VECTOR_OP_EXP, VO_ONLYFP | VO_SPECIAL | VO_MATHLIB);
    /** Produce {@code log(a)}.  Floating only.
     *  Not guaranteed to be semi-monotonic. See section "Operations on floating point vectors" above
     */
-    public static final /*float*/ Unary LOG = unary("LOG", "log", VectorSupport.VECTOR_OP_LOG, VO_ONLYFP);
+    public static final /*float*/ Unary LOG = unary("LOG", "log", VectorSupport.VECTOR_OP_LOG, VO_ONLYFP | VO_SPECIAL | VO_MATHLIB);
    /** Produce {@code log10(a)}.  Floating only.
     *  Not guaranteed to be semi-monotonic. See section "Operations on floating point vectors" above
     */
-    public static final /*float*/ Unary LOG10 = unary("LOG10", "log10", VectorSupport.VECTOR_OP_LOG10, VO_ONLYFP);
+    public static final /*float*/ Unary LOG10 = unary("LOG10", "log10", VectorSupport.VECTOR_OP_LOG10, VO_ONLYFP | VO_SPECIAL | VO_MATHLIB);
    /** Produce {@code sqrt(a)}.  Floating only.  See section "Operations on floating point vectors" above */
    public static final /*float*/ Unary SQRT = unary("SQRT", "sqrt", VectorSupport.VECTOR_OP_SQRT, VO_ONLYFP);
    /** Produce {@code cbrt(a)}.  Floating only.
     *  Not guaranteed to be semi-monotonic. See section "Operations on floating point vectors" above
     */
-    public static final /*float*/ Unary CBRT = unary("CBRT", "cbrt", VectorSupport.VECTOR_OP_CBRT, VO_ONLYFP);
+    public static final /*float*/ Unary CBRT = unary("CBRT", "cbrt", VectorSupport.VECTOR_OP_CBRT, VO_ONLYFP | VO_SPECIAL | VO_MATHLIB);

    /** Produce {@code sinh(a)}.  Floating only.
     *  Not guaranteed to be semi-monotonic. See section "Operations on floating point vectors" above
     */
-    public static final /*float*/ Unary SINH = unary("SINH", "sinh", VectorSupport.VECTOR_OP_SINH, VO_ONLYFP);
+    public static final /*float*/ Unary SINH = unary("SINH", "sinh", VectorSupport.VECTOR_OP_SINH, VO_ONLYFP | VO_SPECIAL | VO_MATHLIB);
    /** Produce {@code cosh(a)}.  Floating only.
     *  Not guaranteed to be semi-monotonic. See section "Operations on floating point vectors" above
     */
-    public static final /*float*/ Unary COSH = unary("COSH", "cosh", VectorSupport.VECTOR_OP_COSH, VO_ONLYFP);
+    public static final /*float*/ Unary COSH = unary("COSH", "cosh", VectorSupport.VECTOR_OP_COSH, VO_ONLYFP | VO_SPECIAL | VO_MATHLIB);
    /** Produce {@code tanh(a)}.  Floating only.
     *  Not guaranteed to be semi-monotonic. See section "Operations on floating point vectors" above
     */
-    public static final /*float*/ Unary TANH = unary("TANH", "tanh", VectorSupport.VECTOR_OP_TANH, VO_ONLYFP);
+    public static final /*float*/ Unary TANH = unary("TANH", "tanh", VectorSupport.VECTOR_OP_TANH, VO_ONLYFP | VO_SPECIAL | VO_MATHLIB);
    /** Produce {@code expm1(a)}.  Floating only.
     *  Not guaranteed to be semi-monotonic. See section "Operations on floating point vectors" above
     */
-    public static final /*float*/ Unary EXPM1 = unary("EXPM1", "expm1", VectorSupport.VECTOR_OP_EXPM1, VO_ONLYFP);
+    public static final /*float*/ Unary EXPM1 = unary("EXPM1", "expm1", VectorSupport.VECTOR_OP_EXPM1, VO_ONLYFP | VO_SPECIAL | VO_MATHLIB);
    /** Produce {@code log1p(a)}.  Floating only.
     *  Not guaranteed to be semi-monotonic. See section "Operations on floating point vectors" above
     */
-    public static final /*float*/ Unary LOG1P = unary("LOG1P", "log1p", VectorSupport.VECTOR_OP_LOG1P, VO_ONLYFP);
+    public static final /*float*/ Unary LOG1P = unary("LOG1P", "log1p", VectorSupport.VECTOR_OP_LOG1P, VO_ONLYFP | VO_SPECIAL | VO_MATHLIB);

    // Binary operators

@ -615,15 +618,15 @@ public abstract class VectorOperators {
    /** Produce {@code atan2(a,b)}. See  Floating only.
     *  Not guaranteed to be semi-monotonic. See section "Operations on floating point vectors" above
     */
-    public static final /*float*/ Binary ATAN2 = binary("ATAN2", "atan2", VectorSupport.VECTOR_OP_ATAN2, VO_ONLYFP);
+    public static final /*float*/ Binary ATAN2 = binary("ATAN2", "atan2", VectorSupport.VECTOR_OP_ATAN2, VO_ONLYFP | VO_SPECIAL | VO_MATHLIB);
    /** Produce {@code pow(a,b)}.  Floating only.
     *  Not guaranteed to be semi-monotonic. See section "Operations on floating point vectors" above
     */
-    public static final /*float*/ Binary POW = binary("POW", "pow", VectorSupport.VECTOR_OP_POW, VO_ONLYFP);
+    public static final /*float*/ Binary POW = binary("POW", "pow", VectorSupport.VECTOR_OP_POW, VO_ONLYFP | VO_SPECIAL | VO_MATHLIB);
    /** Produce {@code hypot(a,b)}.  Floating only.
     *  Not guaranteed to be semi-monotonic. See section "Operations on floating point vectors" above
     */
-    public static final /*float*/ Binary HYPOT = binary("HYPOT", "hypot", VectorSupport.VECTOR_OP_HYPOT, VO_ONLYFP);
+    public static final /*float*/ Binary HYPOT = binary("HYPOT", "hypot", VectorSupport.VECTOR_OP_HYPOT, VO_ONLYFP | VO_SPECIAL | VO_MATHLIB);

    // Ternary operators

@ -1373,7 +1376,7 @@ public abstract class VectorOperators {
            if (fn != null)  return fn;
            fn = supplier.apply(opc);
            if (fn == null)  throw badOp(op);
-            assert(VectorSupport.isNonCapturingLambda(fn)) : fn;
+            assert(isNonCapturingLambda(fn)) : fn;
            // The JIT can see into this cache:
            cache[opc] = fn;
            return fn;
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/X-Vector.java.template
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/X-Vector.java.template
@ -712,10 +712,15 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
                return blend(broadcast(-1), compare(NE, 0));
            }
 #if[BITWISE]
-            if (op == NOT) {
+            else if (op == NOT) {
                return broadcast(-1).lanewise(XOR, this);
            }
 #end[BITWISE]
+#if[FP]
+            else if (opKind(op, VO_MATHLIB)) {
+                return unaryMathOp(op);
+            }
+#end[FP]
        }
        int opc = opCode(op);
        return VectorSupport.unaryOp(
@ -742,10 +747,15 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
                return blend(broadcast(-1), compare(NE, 0, m));
            }
 #if[BITWISE]
-            if (op == NOT) {
+            else if (op == NOT) {
                return lanewise(XOR, broadcast(-1), m);
            }
 #end[BITWISE]
+#if[FP]
+            else if (opKind(op, VO_MATHLIB)) {
+                return blend(unaryMathOp(op), m);
+            }
+#end[FP]
        }
        int opc = opCode(op);
        return VectorSupport.unaryOp(
@ -754,6 +764,15 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
            UN_IMPL.find(op, opc, $abstractvectortype$::unaryOperations));
    }

+#if[FP]
+    @ForceInline
+    final
+    $abstractvectortype$ unaryMathOp(VectorOperators.Unary op) {
+        return VectorMathLibrary.unaryMathOp(op, opCode(op), species(), $abstractvectortype$::unaryOperations,
+                                             this);
+    }
+#end[FP]
+
    private static final
    ImplCache<Unary, UnaryOperation<$abstractvectortype$, VectorMask<$Boxtype$>>>
        UN_IMPL = new ImplCache<>(Unary.class, $Type$Vector.class);
@ -856,6 +875,11 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
                    = this{#if[FP]?.viewAsIntegralLanes()}.compare(EQ, ($bitstype$) 0);
                return this.blend(that, mask{#if[FP]?.cast(vspecies())});
            }
+#if[FP]
+            else if (opKind(op, VO_MATHLIB)) {
+                return binaryMathOp(op, that);
+            }
+#end[FP]
 #if[BITWISE]
 #if[!FP]
            if (opKind(op, VO_SHIFT)) {
@ -915,6 +939,12 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
                return this.blend(that, mask);
 #end[FP]
            }
+#if[FP]
+            else if (opKind(op, VO_MATHLIB)) {
+                return this.blend(binaryMathOp(op, that), m);
+            }
+#end[FP]
+
 #if[BITWISE]
 #if[!FP]
            if (opKind(op, VO_SHIFT)) {
@ -945,6 +975,15 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
            BIN_IMPL.find(op, opc, $abstractvectortype$::binaryOperations));
    }

+#if[FP]
+    @ForceInline
+    final
+    $abstractvectortype$ binaryMathOp(VectorOperators.Binary op, $abstractvectortype$ that) {
+        return VectorMathLibrary.binaryMathOp(op, opCode(op), species(), $abstractvectortype$::binaryOperations,
+                                              this, that);
+    }
+#end[FP]
+
    private static final
    ImplCache<Binary, BinaryOperation<$abstractvectortype$, VectorMask<$Boxtype$>>>
        BIN_IMPL = new ImplCache<>(Binary.class, $Type$Vector.class);