From 4f56990979dfa90a1a01599dc391fa9d09993eaf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20H=C3=A4ssig?= Date: Mon, 15 Dec 2025 07:27:58 +0000 Subject: [PATCH] 8364465: Enhance behavior of some intrinsics Co-authored-by: Matthias Baesken Co-authored-by: Martin Doerr Co-authored-by: Dean Long Co-authored-by: Jatin Bhateja Co-authored-by: Hannes Greule Reviewed-by: ahgross, thartmann, dlong, rhalade --- src/hotspot/cpu/aarch64/aarch64.ad | 3 +- .../cpu/aarch64/macroAssembler_aarch64.cpp | 11 +++++++ .../cpu/aarch64/macroAssembler_aarch64.hpp | 4 +++ src/hotspot/cpu/arm/arm.ad | 8 +++-- src/hotspot/cpu/ppc/ppc.ad | 33 ++++++++++--------- src/hotspot/cpu/x86/macroAssembler_x86.cpp | 12 +++++++ src/hotspot/cpu/x86/macroAssembler_x86.hpp | 3 ++ src/hotspot/cpu/x86/x86.ad | 13 +++++--- 8 files changed, 63 insertions(+), 24 deletions(-) diff --git a/src/hotspot/cpu/aarch64/aarch64.ad b/src/hotspot/cpu/aarch64/aarch64.ad index 53fa4e3066c..185d5b72013 100644 --- a/src/hotspot/cpu/aarch64/aarch64.ad +++ b/src/hotspot/cpu/aarch64/aarch64.ad @@ -7708,10 +7708,11 @@ instruct bytes_reverse_unsigned_short(iRegINoSp dst, iRegIorL2I src) %{ match(Set dst (ReverseBytesUS src)); ins_cost(INSN_COST); - format %{ "rev16w $dst, $src" %} + format %{ "rev16w $dst, $src\t# $dst -> unsigned short" %} ins_encode %{ __ rev16w(as_Register($dst$$reg), as_Register($src$$reg)); + __ narrow_subword_type(as_Register($dst$$reg), T_CHAR); %} ins_pipe(ialu_reg); diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp index 7bec0a3c0ca..40f7251600a 100644 --- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp @@ -2815,6 +2815,17 @@ void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in } } +void MacroAssembler::narrow_subword_type(Register reg, BasicType bt) { + assert(is_subword_type(bt), "required"); + switch (bt) { + case T_BOOLEAN: andw(reg, reg, 1); break; + case T_BYTE: sxtbw(reg, reg); break; + case T_CHAR: uxthw(reg, reg); break; + case T_SHORT: sxthw(reg, reg); break; + default: ShouldNotReachHere(); + } +} + void MacroAssembler::decrementw(Register reg, int value) { if (value < 0) { incrementw(reg, -value); return; } diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp index a6cc862d05c..e5e36d43516 100644 --- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp @@ -33,6 +33,7 @@ #include "oops/compressedOops.hpp" #include "oops/compressedKlass.hpp" #include "runtime/vm_version.hpp" +#include "utilities/globalDefinitions.hpp" #include "utilities/powerOfTwo.hpp" class OopMap; @@ -719,6 +720,9 @@ public: // Support for sign-extension (hi:lo = extend_sign(lo)) void extend_sign(Register hi, Register lo); + // Clean up a subword typed value to the representation in compliance with JVMS §2.3 + void narrow_subword_type(Register reg, BasicType bt); + // Load and store values by size and signed-ness void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed); void store_sized_value(Address dst, Register src, size_t size_in_bytes); diff --git a/src/hotspot/cpu/arm/arm.ad b/src/hotspot/cpu/arm/arm.ad index 60a0ef307b5..45ae283e05a 100644 --- a/src/hotspot/cpu/arm/arm.ad +++ b/src/hotspot/cpu/arm/arm.ad @@ -9214,10 +9214,12 @@ instruct bytes_reverse_long(iRegL dst, iRegL src) %{ instruct bytes_reverse_unsigned_short(iRegI dst, iRegI src) %{ match(Set dst (ReverseBytesUS src)); - size(4); - format %{ "REV16 $dst,$src" %} + size(8); + format %{ "REV32 $dst,$src\n\t" + "LSR $dst,$dst,#16" %} ins_encode %{ - __ rev16($dst$$Register, $src$$Register); + __ rev($dst$$Register, $src$$Register); + __ mov($dst$$Register, AsmOperand($dst$$Register, lsr, 16)); %} ins_pipe( iload_mem ); // FIXME %} diff --git a/src/hotspot/cpu/ppc/ppc.ad b/src/hotspot/cpu/ppc/ppc.ad index f3d33b4305d..33747538e00 100644 --- a/src/hotspot/cpu/ppc/ppc.ad +++ b/src/hotspot/cpu/ppc/ppc.ad @@ -12480,6 +12480,19 @@ instruct countTrailingZerosL_cnttzd(iRegIdst dst, iRegLsrc src) %{ ins_pipe(pipe_class_default); %} +// Expand nodes for byte_reverse_int/ushort/short. +instruct rlwinm(iRegIdst dst, iRegIsrc src, immI16 shift, immI16 mb, immI16 me) %{ + effect(DEF dst, USE src, USE shift, USE mb, USE me); + predicate(false); + + format %{ "RLWINM $dst, $src, $shift, $mb, $me" %} + size(4); + ins_encode %{ + __ rlwinm($dst$$Register, $src$$Register, $shift$$constant, $mb$$constant, $me$$constant); + %} + ins_pipe(pipe_class_default); +%} + // Expand nodes for byte_reverse_int. instruct insrwi_a(iRegIdst dst, iRegIsrc src, immI16 n, immI16 b) %{ effect(DEF dst, USE src, USE n, USE b); @@ -12636,34 +12649,22 @@ instruct bytes_reverse_long(iRegLdst dst, iRegLsrc src) %{ ins_pipe(pipe_class_default); %} +// Need zero extend. Must not use brh only. instruct bytes_reverse_ushort_Ex(iRegIdst dst, iRegIsrc src) %{ match(Set dst (ReverseBytesUS src)); - predicate(!UseByteReverseInstructions); ins_cost(2*DEFAULT_COST); expand %{ + immI16 imm31 %{ (int) 31 %} + immI16 imm24 %{ (int) 24 %} immI16 imm16 %{ (int) 16 %} immI16 imm8 %{ (int) 8 %} - urShiftI_reg_imm(dst, src, imm8); + rlwinm(dst, src, imm24, imm24, imm31); insrwi(dst, src, imm8, imm16); %} %} -instruct bytes_reverse_ushort(iRegIdst dst, iRegIsrc src) %{ - match(Set dst (ReverseBytesUS src)); - predicate(UseByteReverseInstructions); - ins_cost(DEFAULT_COST); - size(4); - - format %{ "BRH $dst, $src" %} - - ins_encode %{ - __ brh($dst$$Register, $src$$Register); - %} - ins_pipe(pipe_class_default); -%} - instruct bytes_reverse_short_Ex(iRegIdst dst, iRegIsrc src) %{ match(Set dst (ReverseBytesS src)); predicate(!UseByteReverseInstructions); diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.cpp b/src/hotspot/cpu/x86/macroAssembler_x86.cpp index 5ab3ca339aa..4c851377ce5 100644 --- a/src/hotspot/cpu/x86/macroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/macroAssembler_x86.cpp @@ -55,6 +55,7 @@ #include "runtime/sharedRuntime.hpp" #include "runtime/stubRoutines.hpp" #include "utilities/checkedCast.hpp" +#include "utilities/globalDefinitions.hpp" #include "utilities/macros.hpp" #ifdef PRODUCT @@ -2540,6 +2541,17 @@ void MacroAssembler::sign_extend_short(Register reg) { movswl(reg, reg); // movsxw } +void MacroAssembler::narrow_subword_type(Register reg, BasicType bt) { + assert(is_subword_type(bt), "required"); + switch (bt) { + case T_BOOLEAN: andl(reg, 1); break; + case T_BYTE: movsbl(reg, reg); break; + case T_CHAR: movzwl(reg, reg); break; + case T_SHORT: movswl(reg, reg); break; + default: ShouldNotReachHere(); + } +} + void MacroAssembler::testl(Address dst, int32_t imm32) { if (imm32 >= 0 && is8bit(imm32)) { testb(dst, imm32); diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.hpp b/src/hotspot/cpu/x86/macroAssembler_x86.hpp index 021d2943ee8..b73339c217f 100644 --- a/src/hotspot/cpu/x86/macroAssembler_x86.hpp +++ b/src/hotspot/cpu/x86/macroAssembler_x86.hpp @@ -444,6 +444,9 @@ class MacroAssembler: public Assembler { void sign_extend_short(Register reg); void sign_extend_byte(Register reg); + // Clean up a subword typed value to the representation in compliance with JVMS §2.3 + void narrow_subword_type(Register reg, BasicType bt); + // Division by power of 2, rounding towards 0 void division_with_shift(Register reg, int shift_value); diff --git a/src/hotspot/cpu/x86/x86.ad b/src/hotspot/cpu/x86/x86.ad index 9b80697601c..dee5a9b7d34 100644 --- a/src/hotspot/cpu/x86/x86.ad +++ b/src/hotspot/cpu/x86/x86.ad @@ -10899,10 +10899,11 @@ instruct xaddB(memory mem, rRegI newval, rFlagsReg cr) %{ predicate(!n->as_LoadStore()->result_not_used()); match(Set newval (GetAndAddB mem newval)); effect(KILL cr); - format %{ "xaddb_lock $mem, $newval" %} + format %{ "xaddb_lock $mem, $newval\t# $newval -> byte" %} ins_encode %{ __ lock(); __ xaddb($mem$$Address, $newval$$Register); + __ narrow_subword_type($newval$$Register, T_BYTE); %} ins_pipe(pipe_cmpxchg); %} @@ -10935,10 +10936,11 @@ instruct xaddS(memory mem, rRegI newval, rFlagsReg cr) %{ predicate(!n->as_LoadStore()->result_not_used()); match(Set newval (GetAndAddS mem newval)); effect(KILL cr); - format %{ "xaddw_lock $mem, $newval" %} + format %{ "xaddw_lock $mem, $newval\t# $newval -> short" %} ins_encode %{ __ lock(); __ xaddw($mem$$Address, $newval$$Register); + __ narrow_subword_type($newval$$Register, T_SHORT); %} ins_pipe(pipe_cmpxchg); %} @@ -11017,18 +11019,20 @@ instruct xaddL(memory mem, rRegL newval, rFlagsReg cr) %{ instruct xchgB( memory mem, rRegI newval) %{ match(Set newval (GetAndSetB mem newval)); - format %{ "XCHGB $newval,[$mem]" %} + format %{ "XCHGB $newval,[$mem]\t# $newval -> byte" %} ins_encode %{ __ xchgb($newval$$Register, $mem$$Address); + __ narrow_subword_type($newval$$Register, T_BYTE); %} ins_pipe( pipe_cmpxchg ); %} instruct xchgS( memory mem, rRegI newval) %{ match(Set newval (GetAndSetS mem newval)); - format %{ "XCHGW $newval,[$mem]" %} + format %{ "XCHGW $newval,[$mem]\t# $newval -> short" %} ins_encode %{ __ xchgw($newval$$Register, $mem$$Address); + __ narrow_subword_type($newval$$Register, T_SHORT); %} ins_pipe( pipe_cmpxchg ); %} @@ -25317,6 +25321,7 @@ instruct reinterpretHF2S(rRegI dst, regF src) format %{ "evmovw $dst, $src" %} ins_encode %{ __ evmovw($dst$$Register, $src$$XMMRegister); + __ narrow_subword_type($dst$$Register, T_SHORT); %} ins_pipe(pipe_slow); %}