jdk/src/hotspot/cpu/x86/stubGenerator_x86_64_tanh.cpp
Vladimir Kozlov 2a8e3b8e66 8382174: Clarify the meaning of address cast in ADD() macro
Reviewed-by: aseoane, adinn, asmehra, dfenacci
2026-04-15 19:02:58 +00:00

537 lines
25 KiB
C++

/*
* Copyright (c) 2024, 2025, Intel Corporation. All rights reserved.
* Intel Math Library (LIBM) Source Code
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#include "macroAssembler_x86.hpp"
#include "stubGenerator_x86_64.hpp"
/******************************************************************************/
// ALGORITHM DESCRIPTION
// ---------------------
//
// tanh(x)=(exp(x)-exp(-x))/(exp(x)+exp(-x))=(1-exp(-2*x))/(1+exp(-2*x))
//
// Let |x|=xH+xL (upper 26 bits, lower 27 bits)
// log2(e) rounded to 26 bits (high part) plus a double precision low part is
// L2EH+L2EL (upper 26, lower 53 bits)
//
// Let xH*L2EH=k+f+r`, where (k+f)*2^8*2=int(xH*L2EH*2^9),
// f=0.b1 b2 ... b8, k integer
// 2^{-f} is approximated as Tn[f]+Dn[f]
// Tn stores the high 53 bits, Dn stores (2^{-f}-Tn[f]) rounded to double precision
//
// r=r`+xL*L2EH+|x|*L2EL, |r|<2^{-9}+2^{-14},
// for |x| in [23/64,3*2^7)
// e^{-2*|x|}=2^{-k-f}*2^{-r} ~ 2^{-k}*(Tn+Dn)*(1+p)=(T0+D0)*(1+p)
//
// For |x| in [2^{-4},22):
// 2^{-r}-1 ~ p=c1*r+c2*r^2+..+c5*r^5
// Let R=1/(1+T0+p*T0), truncated to 35 significant bits
// R=1/(1+T0+D0+p*(T0+D0))*(1+eps), |eps|<2^{-33}
// 1+T0+D0+p*(T0+D0)=KH+KL, where
// KH=(1+T0+c1*r*T0)_high (leading 17 bits)
// KL=T0_low+D0+(c1*r*T0)_low+c1*r*D0+(c2*r^2+..c5*r^5)*T0
// eps ~ (R*KH-1)+R*KL
// 1/(1+T0+D0+p*(T0+D0)) ~ R-R*eps
// The result is approximated as (1-T0-D0-(T0+D0)*p)*(R-R*eps)
// 1-T0-D0-(T0+D0)*p=-((KH-2)+KL)
// The result is formed as
// (KH-2)*R+(-(KH-2)*R*eps+(KL*R-KL*R*eps)), with the correct sign
// set at the end
//
// For |x| in [2^{-64},2^{-4}):
// A Taylor series expansion is used (x+p3*x^3+..+p13*x^{13})
//
// For |x|<2^{-64}: x is returned
//
// For |x|>=22: return +/-1
//
// Special cases:
// tanh(NaN) = quiet NaN, and raise invalid exception
// tanh(+/-INF) = +/-1
// tanh(+/-0) = +/-0
//
/******************************************************************************/
ATTRIBUTE_ALIGNED(4) static const juint _HALFMASK[] =
{
0xF8000000UL, 0x7FFFFFFFUL
};
ATTRIBUTE_ALIGNED(4) static const juint _ONEMASK[] =
{
0x00000000UL, 0x3FF00000UL
};
ATTRIBUTE_ALIGNED(4) static const juint _TWOMASK[] =
{
0x00000000UL, 0x40000000UL
};
ATTRIBUTE_ALIGNED(16) static const juint _MASK3[] =
{
0x00000000UL, 0xFFFFFFF0UL, 0x00000000UL, 0xFFFFFFF0UL
};
ATTRIBUTE_ALIGNED(16) static const juint _RMASK[] =
{
0xFFFC0000UL, 0xFFFFFFFFUL, 0xFFFC0000UL, 0xFFFFFFFFUL
};
ATTRIBUTE_ALIGNED(16) static const juint _L2E[] =
{
0x60000000UL, 0x40871547UL, 0xF85DDF44UL, 0x3EE4AE0BUL
};
ATTRIBUTE_ALIGNED(16) static const juint _Shifter[] =
{
0x00000000UL, 0x43380000UL, 0x00000000UL, 0xC3380000UL
};
ATTRIBUTE_ALIGNED(16) static const juint _cv[] =
{
0xE78A6731UL, 0xBCD5D87FUL, 0xD704A0BFUL, 0xBE2C6B08UL, 0x6FBA4E77UL,
0x3D83B2ABUL, 0xFF82C58EUL, 0x3ECEBFBDUL, 0xFEFA39EFUL, 0xBF662E42UL,
0x00000000UL, 0x00000000UL
};
ATTRIBUTE_ALIGNED(4) static const juint _pv[] =
{
0x0E157DDFUL, 0x3F6D6D3DUL, 0x1BA1BA1CUL, 0xBFABA1BAUL, 0x55E6C23DUL,
0xBF8226E3UL, 0x11111111UL, 0x3FC11111UL, 0x882C10FAUL, 0x3F9664F4UL,
0x55555555UL, 0xBFD55555UL
};
ATTRIBUTE_ALIGNED(16) static const juint _T2_neg_f[] =
{
0x00000000UL, 0x3FF00000UL, 0x00000000UL, 0x00000000UL, 0x6B2A23D9UL, 0x3FEFE9D9UL,
0x7442FDE3UL, 0x3C64A603UL, 0x2B8F71F1UL, 0x3FEFD3C2UL, 0x966579E7UL, 0x3C52EB74UL,
0x3692D514UL, 0x3FEFBDBAUL, 0x15098EB6UL, 0xBC696773UL, 0x819E90D8UL, 0x3FEFA7C1UL,
0xF3A5931EUL, 0x3C774853UL, 0x02243C89UL, 0x3FEF91D8UL, 0xA779F689UL, 0xBC512EA8UL,
0xAD9CBE14UL, 0x3FEF7BFDUL, 0xD006350AUL, 0xBC8DBB12UL, 0x798844F8UL, 0x3FEF6632UL,
0x3539343EUL, 0x3C8FA37BUL, 0x5B6E4540UL, 0x3FEF5076UL, 0x2DD8A18BUL, 0x3C89D3E1UL,
0x48DD7274UL, 0x3FEF3AC9UL, 0x3ED837DEUL, 0xBC695A5AUL, 0x376BBA97UL, 0x3FEF252BUL,
0xBF0D8E43UL, 0x3C83A1A5UL, 0x1CB6412AUL, 0x3FEF0F9CUL, 0x65181D45UL, 0xBC832200UL,
0xEE615A27UL, 0x3FEEFA1BUL, 0x86A4B6B0UL, 0x3C8DC7F4UL, 0xA2188510UL, 0x3FEEE4AAUL,
0xA487568DUL, 0x3C81C68DUL, 0x2D8E67F1UL, 0x3FEECF48UL, 0xB411AD8CUL, 0xBC8C93F3UL,
0x867CCA6EUL, 0x3FEEB9F4UL, 0x2293E4F2UL, 0x3C84832FUL, 0xA2A490DAUL, 0x3FEEA4AFUL,
0x179C2893UL, 0xBC8E9C23UL, 0x77CDB740UL, 0x3FEE8F79UL, 0x80B054B1UL, 0xBC810894UL,
0xFBC74C83UL, 0x3FEE7A51UL, 0xCA0C8DE2UL, 0x3C82D522UL, 0x24676D76UL, 0x3FEE6539UL,
0x7522B735UL, 0xBC763FF8UL, 0xE78B3FF6UL, 0x3FEE502EUL, 0x80A9CC8FUL, 0x3C739E89UL,
0x3B16EE12UL, 0x3FEE3B33UL, 0x31FDC68BUL, 0xBC89F4A4UL, 0x14F5A129UL, 0x3FEE2646UL,
0x817A1496UL, 0xBC87B627UL, 0x6B197D17UL, 0x3FEE1167UL, 0xBD5C7F44UL, 0xBC62B529UL,
0x337B9B5FUL, 0x3FEDFC97UL, 0x4F184B5CUL, 0xBC81A5CDUL, 0x641C0658UL, 0x3FEDE7D5UL,
0x8E79BA8FUL, 0xBC8CA552UL, 0xF301B460UL, 0x3FEDD321UL, 0x78F018C3UL, 0x3C82DA57UL,
0xD63A8315UL, 0x3FEDBE7CUL, 0x926B8BE4UL, 0xBC8B76F1UL, 0x03DB3285UL, 0x3FEDA9E6UL,
0x696DB532UL, 0x3C8C2300UL, 0x71FF6075UL, 0x3FED955DUL, 0xBB9AF6BEUL, 0x3C8A052DUL,
0x16C98398UL, 0x3FED80E3UL, 0x8BEDDFE8UL, 0xBC811EC1UL, 0xE862E6D3UL, 0x3FED6C76UL,
0x4A8165A0UL, 0x3C4FE87AUL, 0xDCFBA487UL, 0x3FED5818UL, 0xD75B3707UL, 0x3C72ED02UL,
0xEACAA1D6UL, 0x3FED43C8UL, 0xBF5A1614UL, 0x3C83DB53UL, 0x080D89F2UL, 0x3FED2F87UL,
0x719D8578UL, 0xBC8D487BUL, 0x2B08C968UL, 0x3FED1B53UL, 0x219A36EEUL, 0x3C855636UL,
0x4A07897CUL, 0x3FED072DUL, 0x43797A9CUL, 0xBC8CBC37UL, 0x5B5BAB74UL, 0x3FECF315UL,
0xB86DFF57UL, 0xBC8A08E9UL, 0x555DC3FAUL, 0x3FECDF0BUL, 0x53829D72UL, 0xBC7DD83BUL,
0x2E6D1675UL, 0x3FECCB0FUL, 0x86009093UL, 0xBC6D220FUL, 0xDCEF9069UL, 0x3FECB720UL,
0xD1E949DCUL, 0x3C6503CBUL, 0x5751C4DBUL, 0x3FECA340UL, 0xD10D08F5UL, 0xBC77F2BEUL,
0x9406E7B5UL, 0x3FEC8F6DUL, 0x48805C44UL, 0x3C61ACBCUL, 0x8988C933UL, 0x3FEC7BA8UL,
0xBE255559UL, 0xBC7E76BBUL, 0x2E57D14BUL, 0x3FEC67F1UL, 0xFF483CADUL, 0x3C82884DUL,
0x78FAFB22UL, 0x3FEC5447UL, 0x2493B5AFUL, 0x3C812F07UL, 0x5FFFD07AUL, 0x3FEC40ABUL,
0xE083C60AUL, 0x3C8B4537UL, 0xD9FA652CUL, 0x3FEC2D1CUL, 0x17C8A5D7UL, 0xBC86E516UL,
0xDD85529CUL, 0x3FEC199BUL, 0x895048DDUL, 0x3C711065UL, 0x6141B33DUL, 0x3FEC0628UL,
0xA1FBCA34UL, 0xBC7D8A5AUL, 0x5BD71E09UL, 0x3FEBF2C2UL, 0x3F6B9C73UL, 0xBC8EFDCAUL,
0xC3F3A207UL, 0x3FEBDF69UL, 0x60EA5B53UL, 0xBC2C2623UL, 0x904BC1D2UL, 0x3FEBCC1EUL,
0x7A2D9E84UL, 0x3C723DD0UL, 0xB79A6F1FUL, 0x3FEBB8E0UL, 0xC9696204UL, 0xBC2F52D1UL,
0x30A1064AUL, 0x3FEBA5B0UL, 0x0E54292EUL, 0xBC8EFCD3UL, 0xF22749E4UL, 0x3FEB928CUL,
0x54CB65C6UL, 0xBC8B7216UL, 0xF2FB5E47UL, 0x3FEB7F76UL, 0x7E54AC3BUL, 0xBC65584FUL,
0x29F1C52AUL, 0x3FEB6C6EUL, 0x52883F6EUL, 0x3C82A8F3UL, 0x8DE5593AUL, 0x3FEB5972UL,
0xBBBA6DE3UL, 0xBC8C71DFUL, 0x15B749B1UL, 0x3FEB4684UL, 0xE9DF7C90UL, 0xBC6F763DUL,
0xB84F15FBUL, 0x3FEB33A2UL, 0x3084D708UL, 0xBC52805EUL, 0x6C9A8952UL, 0x3FEB20CEUL,
0x4A0756CCUL, 0x3C84DD02UL, 0x298DB666UL, 0x3FEB0E07UL, 0x4C80E425UL, 0xBC8BDEF5UL,
0xE622F2FFUL, 0x3FEAFB4CUL, 0x0F315ECDUL, 0xBC84B2FCUL, 0x995AD3ADUL, 0x3FEAE89FUL,
0x345DCC81UL, 0x3C87A1CDUL, 0x3A3C2774UL, 0x3FEAD5FFUL, 0xB6B1B8E5UL, 0x3C87EF3BUL,
0xBFD3F37AUL, 0x3FEAC36BUL, 0xCAE76CD0UL, 0xBC7F9234UL, 0x21356EBAUL, 0x3FEAB0E5UL,
0xDAE94545UL, 0x3C789C31UL, 0x5579FDBFUL, 0x3FEA9E6BUL, 0x0EF7FD31UL, 0x3C80FAC9UL,
0x53C12E59UL, 0x3FEA8BFEUL, 0xB2BA15A9UL, 0xBC84F867UL, 0x1330B358UL, 0x3FEA799EUL,
0xCAC563C7UL, 0x3C8BCB7EUL, 0x8AF46052UL, 0x3FEA674AUL, 0x30670366UL, 0x3C550F56UL,
0xB23E255DUL, 0x3FEA5503UL, 0xDB8D41E1UL, 0xBC8D2F6EUL, 0x80460AD8UL, 0x3FEA42C9UL,
0x589FB120UL, 0xBC8AA780UL, 0xEC4A2D33UL, 0x3FEA309BUL, 0x7DDC36ABUL, 0x3C86305CUL,
0xED8EB8BBUL, 0x3FEA1E7AUL, 0xEE8BE70EUL, 0x3C8C6618UL, 0x7B5DE565UL, 0x3FEA0C66UL,
0x5D1CD533UL, 0xBC835949UL, 0x8D07F29EUL, 0x3FE9FA5EUL, 0xAAF1FACEUL, 0xBC74A9CEUL,
0x19E32323UL, 0x3FE9E863UL, 0x78E64C6EUL, 0x3C6824CAUL, 0x194BB8D5UL, 0x3FE9D674UL,
0xA3DD8233UL, 0xBC8516BEUL, 0x82A3F090UL, 0x3FE9C491UL, 0xB071F2BEUL, 0x3C6C7C46UL,
0x4D53FE0DUL, 0x3FE9B2BBUL, 0x4DF6D518UL, 0xBC8DD84EUL, 0x70CA07BAUL, 0x3FE9A0F1UL,
0x91CEE632UL, 0xBC8173BDUL, 0xE47A22A2UL, 0x3FE98F33UL, 0xA24C78ECUL, 0x3C6CABDAUL,
0x9FDE4E50UL, 0x3FE97D82UL, 0x7C1B85D1UL, 0xBC8D185BUL, 0x9A7670B3UL, 0x3FE96BDDUL,
0x7F19C896UL, 0xBC4BA596UL, 0xCBC8520FUL, 0x3FE95A44UL, 0x96A5F039UL, 0xBC664B7CUL,
0x2B5F98E5UL, 0x3FE948B8UL, 0x797D2D99UL, 0xBC7DC3D6UL, 0xB0CDC5E5UL, 0x3FE93737UL,
0x81B57EBCUL, 0xBC575FC7UL, 0x53AA2FE2UL, 0x3FE925C3UL, 0xA639DB7FUL, 0xBC73455FUL,
0x0B91FFC6UL, 0x3FE9145BUL, 0x2E582524UL, 0xBC8DD679UL, 0xD0282C8AUL, 0x3FE902FEUL,
0x85FE3FD2UL, 0x3C8592CAUL, 0x99157736UL, 0x3FE8F1AEUL, 0xA2E3976CUL, 0x3C75CC13UL,
0x5E0866D9UL, 0x3FE8E06AUL, 0x6FC9B2E6UL, 0xBC87114AUL, 0x16B5448CUL, 0x3FE8CF32UL,
0x32E9E3AAUL, 0xBC60D55EUL, 0xBAD61778UL, 0x3FE8BE05UL, 0xFC43446EUL, 0x3C8ECB5EUL,
0x422AA0DBUL, 0x3FE8ACE5UL, 0x56864B27UL, 0x3C86E9F1UL, 0xA478580FUL, 0x3FE89BD0UL,
0x4475202AUL, 0x3C8D5395UL, 0xD98A6699UL, 0x3FE88AC7UL, 0xF37CB53AUL, 0x3C8994C2UL,
0xD931A436UL, 0x3FE879CAUL, 0xD2DB47BDUL, 0x3C75D2D7UL, 0x9B4492EDUL, 0x3FE868D9UL,
0x9BD4F6BAUL, 0xBC8FC6F8UL, 0x179F5B21UL, 0x3FE857F4UL, 0xF8B216D0UL, 0xBC4BA748UL,
0x4623C7ADUL, 0x3FE8471AUL, 0xA341CDFBUL, 0xBC78D684UL, 0x1EB941F7UL, 0x3FE8364CUL,
0x31DF2BD5UL, 0x3C899B9AUL, 0x994CCE13UL, 0x3FE82589UL, 0xD41532D8UL, 0xBC8D4C1DUL,
0xADD106D9UL, 0x3FE814D2UL, 0x0D151D4DUL, 0x3C846437UL, 0x543E1A12UL, 0x3FE80427UL,
0x626D972BUL, 0xBC827C86UL, 0x8491C491UL, 0x3FE7F387UL, 0xCF9311AEUL, 0xBC707F11UL,
0x36CF4E62UL, 0x3FE7E2F3UL, 0xBA15797EUL, 0x3C605D02UL, 0x62FF86F0UL, 0x3FE7D26AUL,
0xFB72B8B4UL, 0x3C81BDDBUL, 0x0130C132UL, 0x3FE7C1EDUL, 0xD1164DD6UL, 0x3C8F124CUL,
0x0976CFDBUL, 0x3FE7B17BUL, 0x8468DC88UL, 0xBC8BEBB5UL, 0x73EB0187UL, 0x3FE7A114UL,
0xEE04992FUL, 0xBC741577UL, 0x38AC1CF6UL, 0x3FE790B9UL, 0x62AADD3EUL, 0x3C8349A8UL,
0x4FDE5D3FUL, 0x3FE78069UL, 0x0A02162DUL, 0x3C8866B8UL, 0xB1AB6E09UL, 0x3FE77024UL,
0x169147F8UL, 0x3C8B7877UL, 0x564267C9UL, 0x3FE75FEBUL, 0x57316DD3UL, 0xBC802459UL,
0x35D7CBFDUL, 0x3FE74FBDUL, 0x618A6E1CUL, 0x3C8047FDUL, 0x48A58174UL, 0x3FE73F9AUL,
0x6C65D53CUL, 0xBC80A8D9UL, 0x86EAD08AUL, 0x3FE72F82UL, 0x2CD62C72UL, 0xBC820AA0UL,
0xE8EC5F74UL, 0x3FE71F75UL, 0x86887A99UL, 0xBC716E47UL, 0x66F42E87UL, 0x3FE70F74UL,
0xD45AA65FUL, 0x3C49D644UL, 0xF9519484UL, 0x3FE6FF7DUL, 0x25860EF6UL, 0xBC783C0FUL,
0x98593AE5UL, 0x3FE6EF92UL, 0x9E1AC8B2UL, 0xBC80B974UL, 0x3C651A2FUL, 0x3FE6DFB2UL,
0x683C88ABUL, 0xBC5BBE3AUL, 0xDDD47645UL, 0x3FE6CFDCUL, 0xB6F17309UL, 0x3C8C7AA9UL,
0x750BDABFUL, 0x3FE6C012UL, 0x67FF0B0DUL, 0xBC628956UL, 0xFA75173EUL, 0x3FE6B052UL,
0x2C9A9D0EUL, 0x3C6A38F5UL, 0x667F3BCDUL, 0x3FE6A09EUL, 0x13B26456UL, 0xBC8BDD34UL,
0xB19E9538UL, 0x3FE690F4UL, 0x9AEB445DUL, 0x3C7804BDUL, 0xD44CA973UL, 0x3FE68155UL,
0x44F73E65UL, 0x3C5038AEUL, 0xC70833F6UL, 0x3FE671C1UL, 0x586C6134UL, 0xBC7E8732UL,
0x82552225UL, 0x3FE66238UL, 0x87591C34UL, 0xBC8BB609UL, 0xFEBC8FB7UL, 0x3FE652B9UL,
0xC9A73E09UL, 0xBC8AE3D5UL, 0x34CCC320UL, 0x3FE64346UL, 0x759D8933UL, 0xBC7C483CUL,
0x1D1929FDUL, 0x3FE633DDUL, 0xBEB964E5UL, 0x3C884710UL, 0xB03A5585UL, 0x3FE6247EUL,
0x7E40B497UL, 0xBC8383C1UL, 0xE6CDF6F4UL, 0x3FE6152AUL, 0x4AB84C27UL, 0x3C8E4B3EUL,
0xB976DC09UL, 0x3FE605E1UL, 0x9B56DE47UL, 0xBC83E242UL, 0x20DCEB71UL, 0x3FE5F6A3UL,
0xE3CDCF92UL, 0xBC79EADDUL, 0x15AD2148UL, 0x3FE5E76FUL, 0x3080E65EUL, 0x3C8BA6F9UL,
0x90998B93UL, 0x3FE5D845UL, 0xA8B45643UL, 0xBC8CD6A7UL, 0x8A5946B7UL, 0x3FE5C926UL,
0x816986A2UL, 0x3C2C4B1BUL, 0xFBA87A03UL, 0x3FE5BA11UL, 0x4C233E1AUL, 0xBC8B77A1UL,
0xDD485429UL, 0x3FE5AB07UL, 0x054647ADUL, 0x3C86324CUL, 0x27FF07CCUL, 0x3FE59C08UL,
0xE467E60FUL, 0xBC87E2CEUL, 0xD497C7FDUL, 0x3FE58D12UL, 0x5B9A1DE8UL, 0x3C7295E1UL,
0xDBE2C4CFUL, 0x3FE57E27UL, 0x8A57B9C4UL, 0xBC80B98CUL, 0x36B527DAUL, 0x3FE56F47UL,
0x011D93ADUL, 0x3C89BB2CUL, 0xDDE910D2UL, 0x3FE56070UL, 0x168EEBF0UL, 0xBC80FB6EUL,
0xCA5D920FUL, 0x3FE551A4UL, 0xEFEDE59BUL, 0xBC7D689CUL, 0xF4F6AD27UL, 0x3FE542E2UL,
0x192D5F7EUL, 0x3C77926DUL, 0x569D4F82UL, 0x3FE5342BUL, 0x1DB13CADUL, 0xBC707ABEUL,
0xE83F4EEFUL, 0x3FE5257DUL, 0x43EFEF71UL, 0xBC6C998DUL, 0xA2CF6642UL, 0x3FE516DAUL,
0x69BD93EFUL, 0xBC7F7685UL, 0x7F4531EEUL, 0x3FE50841UL, 0x49B7465FUL, 0x3C6A249BUL,
0x769D2CA7UL, 0x3FE4F9B2UL, 0xD25957E3UL, 0xBC84B309UL, 0x81D8ABFFUL, 0x3FE4EB2DUL,
0x2E5D7A52UL, 0xBC85257DUL, 0x99FDDD0DUL, 0x3FE4DCB2UL, 0xBC6A7833UL, 0x3C88ECDBUL,
0xB817C114UL, 0x3FE4CE41UL, 0x690ABD5DUL, 0x3C805E29UL, 0xD5362A27UL, 0x3FE4BFDAUL,
0xAFEC42E2UL, 0x3C6D4397UL, 0xEA6DB7D7UL, 0x3FE4B17DUL, 0x7F2897F0UL, 0xBC7125B8UL,
0xF0D7D3DEUL, 0x3FE4A32AUL, 0xF3D1BE56UL, 0x3C89CB62UL, 0xE192AED2UL, 0x3FE494E1UL,
0x5E499EA0UL, 0xBC73B289UL, 0xB5C13CD0UL, 0x3FE486A2UL, 0xB69062F0UL, 0x3C63C1A3UL,
0x668B3237UL, 0x3FE4786DUL, 0xED445733UL, 0xBC8C20F0UL, 0xED1D0057UL, 0x3FE46A41UL,
0xD1648A76UL, 0x3C8C944BUL, 0x42A7D232UL, 0x3FE45C20UL, 0x82FB1F8EUL, 0xBC586419UL,
0x6061892DUL, 0x3FE44E08UL, 0x04EF80D0UL, 0x3C389B7AUL, 0x3F84B9D4UL, 0x3FE43FFAUL,
0x9704C003UL, 0x3C7880BEUL, 0xD950A897UL, 0x3FE431F5UL, 0xE35F7999UL, 0xBC71C7DDUL,
0x2709468AUL, 0x3FE423FBUL, 0xC0B314DDUL, 0xBC88462DUL, 0x21F72E2AUL, 0x3FE4160AUL,
0x1C309278UL, 0xBC4EF369UL, 0xC367A024UL, 0x3FE40822UL, 0xB6F4D048UL, 0x3C7BDDF8UL,
0x04AC801CUL, 0x3FE3FA45UL, 0xF956F9F3UL, 0xBC87D023UL, 0xDF1C5175UL, 0x3FE3EC70UL,
0x7B8C9BCAUL, 0xBC7AF663UL, 0x4C123422UL, 0x3FE3DEA6UL, 0x11F09EBCUL, 0x3C7ADA09UL,
0x44EDE173UL, 0x3FE3D0E5UL, 0x8C284C71UL, 0x3C6FE8D0UL, 0xC313A8E5UL, 0x3FE3C32DUL,
0x375D29C3UL, 0xBC8EFFF8UL, 0xBFEC6CF4UL, 0x3FE3B57FUL, 0xE26FFF18UL, 0x3C854C66UL,
0x34E59FF7UL, 0x3FE3A7DBUL, 0xD661F5E3UL, 0xBC65E436UL, 0x1B7140EFUL, 0x3FE39A40UL,
0xFC8E2934UL, 0xBC89A9A5UL, 0x6D05D866UL, 0x3FE38CAEUL, 0x3C9904BDUL, 0xBC8E958DUL,
0x231E754AUL, 0x3FE37F26UL, 0x9ECEB23CUL, 0xBC89F5CAUL, 0x373AA9CBUL, 0x3FE371A7UL,
0xBF42EAE2UL, 0xBC863AEAUL, 0xA2DE883BUL, 0x3FE36431UL, 0xA06CB85EUL, 0xBC7C3144UL,
0x5F929FF1UL, 0x3FE356C5UL, 0x5C4E4628UL, 0xBC7B5CEEUL, 0x66E3FA2DUL, 0x3FE34962UL,
0x930881A4UL, 0xBC735A75UL, 0xB26416FFUL, 0x3FE33C08UL, 0x843659A6UL, 0x3C832721UL,
0x3BA8EA32UL, 0x3FE32EB8UL, 0x3CB4F318UL, 0xBC8C45E8UL, 0xFC4CD831UL, 0x3FE32170UL,
0x8E18047CUL, 0x3C7A9CE7UL, 0xEDEEB2FDUL, 0x3FE31432UL, 0xF3F3FCD1UL, 0x3C7959A3UL,
0x0A31B715UL, 0x3FE306FEUL, 0xD23182E4UL, 0x3C76F46AUL, 0x4ABD886BUL, 0x3FE2F9D2UL,
0x532BDA93UL, 0xBC553C55UL, 0xA93E2F56UL, 0x3FE2ECAFUL, 0x45D52383UL, 0x3C61CA0FUL,
0x1F641589UL, 0x3FE2DF96UL, 0xFBBCE198UL, 0x3C8D16CFUL, 0xA6E4030BUL, 0x3FE2D285UL,
0x54DB41D5UL, 0x3C800247UL, 0x39771B2FUL, 0x3FE2C57EUL, 0xA6EB5124UL, 0xBC850145UL,
0xD0DAD990UL, 0x3FE2B87FUL, 0xD6381AA4UL, 0xBC310ADCUL, 0x66D10F13UL, 0x3FE2AB8AUL,
0x191690A7UL, 0xBC895743UL, 0xF51FDEE1UL, 0x3FE29E9DUL, 0xAFAD1255UL, 0x3C7612E8UL,
0x7591BB70UL, 0x3FE291BAUL, 0x28401CBDUL, 0xBC72CC72UL, 0xE1F56381UL, 0x3FE284DFUL,
0x8C3F0D7EUL, 0xBC8A4C3AUL, 0x341DDF29UL, 0x3FE2780EUL, 0x05F9E76CUL, 0x3C8E067CUL,
0x65E27CDDUL, 0x3FE26B45UL, 0x9940E9D9UL, 0x3C72BD33UL, 0x711ECE75UL, 0x3FE25E85UL,
0x4AC31B2CUL, 0x3C83E1A2UL, 0x4FB2A63FUL, 0x3FE251CEUL, 0xBEF4F4A4UL, 0x3C7AC155UL,
0xFB82140AUL, 0x3FE2451FUL, 0x911CA996UL, 0x3C7ACFCCUL, 0x6E756238UL, 0x3FE2387AUL,
0xB6C70573UL, 0x3C89B07EUL, 0xA27912D1UL, 0x3FE22BDDUL, 0x5577D69FUL, 0x3C7D34FBUL,
0x917DDC96UL, 0x3FE21F49UL, 0x9494A5EEUL, 0x3C72A97EUL, 0x3578A819UL, 0x3FE212BEUL,
0x2CFCAAC9UL, 0x3C83592DUL, 0x88628CD6UL, 0x3FE2063BUL, 0x814A8495UL, 0x3C7DC775UL,
0x8438CE4DUL, 0x3FE1F9C1UL, 0xA097AF5CUL, 0xBC8BF524UL, 0x22FCD91DUL, 0x3FE1ED50UL,
0x027BB78CUL, 0xBC81DF98UL, 0x5EB44027UL, 0x3FE1E0E7UL, 0x088CB6DEUL, 0xBC86FDD8UL,
0x3168B9AAUL, 0x3FE1D487UL, 0x00A2643CUL, 0x3C8E016EUL, 0x95281C6BUL, 0x3FE1C82FUL,
0x8010F8C9UL, 0x3C800977UL, 0x84045CD4UL, 0x3FE1BBE0UL, 0x352EF607UL, 0xBC895386UL,
0xF8138A1CUL, 0x3FE1AF99UL, 0xA4B69280UL, 0x3C87BF85UL, 0xEB6FCB75UL, 0x3FE1A35BUL,
0x7B4968E4UL, 0x3C7E5B4CUL, 0x58375D2FUL, 0x3FE19726UL, 0x85F17E08UL, 0x3C84AADDUL,
0x388C8DEAUL, 0x3FE18AF9UL, 0xD1970F6CUL, 0xBC811023UL, 0x8695BBC0UL, 0x3FE17ED4UL,
0xE2AC5A64UL, 0x3C609E3FUL, 0x3C7D517BUL, 0x3FE172B8UL, 0xB9D78A76UL, 0xBC719041UL,
0x5471C3C2UL, 0x3FE166A4UL, 0x82EA1A32UL, 0x3C48F23BUL, 0xC8A58E51UL, 0x3FE15A98UL,
0xB9EEAB0AUL, 0x3C72406AUL, 0x934F312EUL, 0x3FE14E95UL, 0x39BF44ABUL, 0xBC7B91E8UL,
0xAEA92DE0UL, 0x3FE1429AUL, 0x9AF1369EUL, 0xBC832FBFUL, 0x14F204ABUL, 0x3FE136A8UL,
0xBA48DCF0UL, 0xBC57108FUL, 0xC06C31CCUL, 0x3FE12ABDUL, 0xB36CA5C7UL, 0xBC41B514UL,
0xAB5E2AB6UL, 0x3FE11EDBUL, 0xF703FB72UL, 0xBC8CA454UL, 0xD0125B51UL, 0x3FE11301UL,
0x39449B3AUL, 0xBC86C510UL, 0x28D7233EUL, 0x3FE10730UL, 0x1692FDD5UL, 0x3C7D46EBUL,
0xAFFED31BUL, 0x3FE0FB66UL, 0xC44EBD7BUL, 0xBC5B9BEDUL, 0x5FDFA9C5UL, 0x3FE0EFA5UL,
0xBC54021BUL, 0xBC849DB9UL, 0x32D3D1A2UL, 0x3FE0E3ECUL, 0x27C57B52UL, 0x3C303A17UL,
0x23395DECUL, 0x3FE0D83BUL, 0xE43F316AUL, 0xBC8BC14DUL, 0x2B7247F7UL, 0x3FE0CC92UL,
0x16E24F71UL, 0x3C801EDCUL, 0x45E46C85UL, 0x3FE0C0F1UL, 0x06D21CEFUL, 0x3C84F989UL,
0x6CF9890FUL, 0x3FE0B558UL, 0x4ADC610BUL, 0x3C88A62EUL, 0x9B1F3919UL, 0x3FE0A9C7UL,
0x873D1D38UL, 0x3C75D16CUL, 0xCAC6F383UL, 0x3FE09E3EUL, 0x18316136UL, 0x3C814878UL,
0xF66607E0UL, 0x3FE092BDUL, 0x800A3FD1UL, 0xBC868063UL, 0x18759BC8UL, 0x3FE08745UL,
0x4BB284FFUL, 0x3C5186BEUL, 0x2B72A836UL, 0x3FE07BD4UL, 0x54458700UL, 0x3C732334UL,
0x29DDF6DEUL, 0x3FE0706BUL, 0xE2B13C27UL, 0xBC7C91DFUL, 0x0E3C1F89UL, 0x3FE0650AUL,
0x5799C397UL, 0xBC85CB7BUL, 0xD3158574UL, 0x3FE059B0UL, 0xA475B465UL, 0x3C7D73E2UL,
0x72F654B1UL, 0x3FE04E5FUL, 0x3AA0D08CUL, 0x3C74C379UL, 0xE86E7F85UL, 0x3FE04315UL,
0x1977C96EUL, 0xBC80A31CUL, 0x2E11BBCCUL, 0x3FE037D4UL, 0xEEADE11AUL, 0x3C556811UL,
0x3E778061UL, 0x3FE02C9AUL, 0x535B085DUL, 0xBC619083UL, 0x143B0281UL, 0x3FE02168UL,
0x0FC54EB6UL, 0xBC72BF31UL, 0xA9FB3335UL, 0x3FE0163DUL, 0x9AB8CDB7UL, 0x3C8B6129UL,
0xFA5ABCBFUL, 0x3FE00B1AUL, 0xA7609F71UL, 0xBC74F6B2UL
};
#define __ _masm->
address StubGenerator::generate_libmTanh() {
StubId stub_id = StubId::stubgen_dtanh_id;
int entry_count = StubInfo::entry_count(stub_id);
assert(entry_count == 1, "sanity check");
address start = load_archive_data(stub_id);
if (start != nullptr) {
return start;
}
StubCodeMark mark(this, stub_id);
start = __ pc();
Label L_2TAG_PACKET_0_0_1, L_2TAG_PACKET_1_0_1, L_2TAG_PACKET_2_0_1, L_2TAG_PACKET_3_0_1;
Label L_2TAG_PACKET_4_0_1, L_2TAG_PACKET_5_0_1;
Label B1_2, B1_4;
address HALFMASK = (address)_HALFMASK;
address ONEMASK = (address)_ONEMASK;
address TWOMASK = (address)_TWOMASK;
address MASK3 = (address)_MASK3;
address RMASK = (address)_RMASK;
address L2E = (address)_L2E;
address Shifter = (address)_Shifter;
address cv = (address)_cv;
address pv = (address)_pv;
address T2_neg_f = (address) _T2_neg_f;
__ enter(); // required for proper stackwalking of RuntimeStub frame
__ bind(B1_2);
__ pextrw(rcx, xmm0, 3);
__ movl(rdx, 32768);
__ andl(rdx, rcx);
__ andl(rcx, 32767);
__ cmpl(rcx, 16438);
__ jcc(Assembler::aboveEqual, L_2TAG_PACKET_2_0_1); // Branch only if |x| >= 22
__ movsd(xmm3, ExternalAddress(HALFMASK), r11 /*rscratch*/);
__ xorpd(xmm4, xmm4);
__ movsd(xmm1, ExternalAddress(L2E), r11 /*rscratch*/);
__ movsd(xmm2, ExternalAddress(L2E + 8), r11 /*rscratch*/);
__ movl(rax, 32768);
__ pinsrw(xmm4, rax, 3);
__ movsd(xmm6, ExternalAddress(Shifter), r11 /*rscratch*/);
__ andpd(xmm3, xmm0);
__ andnpd(xmm4, xmm0);
__ pshufd(xmm5, xmm4, 68);
__ subl(rcx, 16304);
__ cmpl(rcx, 134);
__ jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_1); // Branch only if |x| is not in [2^{-4},22)
__ subsd(xmm4, xmm3);
__ mulsd(xmm3, xmm1);
__ mulsd(xmm2, xmm5);
__ cvtsd2siq(rax, xmm3);
__ movq(xmm7, xmm3);
__ addsd(xmm3, xmm6);
__ mulsd(xmm1, xmm4);
__ movsd(xmm4, ExternalAddress(ONEMASK), r11 /*rscratch*/);
__ subsd(xmm3, xmm6);
__ xorpd(xmm0, xmm0);
__ addsd(xmm2, xmm1);
__ subsd(xmm7, xmm3);
__ movdqu(xmm6, ExternalAddress(cv), r11 /*rscratch*/);
__ addsd(xmm2, xmm7);
__ movl(rcx, 255);
__ andl(rcx, rax);
__ addl(rcx, rcx);
__ lea(r8, ExternalAddress(T2_neg_f));
__ movdqu(xmm5, Address(r8, rcx, Address::times(8)));
__ shrl(rax, 4);
__ andl(rax, 65520);
__ subl(rax, 16368);
__ negl(rax);
__ pinsrw(xmm0, rax, 3);
__ movdqu(xmm1, ExternalAddress(cv + 16), r11 /*rscratch*/);
__ pshufd(xmm0, xmm0, 68);
__ mulpd(xmm0, xmm5);
__ movsd(xmm7, ExternalAddress(cv + 32), r11 /*rscratch*/);
__ pshufd(xmm2, xmm2, 68);
__ movq(xmm5, xmm4);
__ addsd(xmm4, xmm0);
__ mulpd(xmm6, xmm2);
__ mulsd(xmm7, xmm2);
__ mulpd(xmm2, xmm2);
__ addpd(xmm1, xmm6);
__ mulsd(xmm2, xmm2);
__ movsd(xmm3, ExternalAddress(ONEMASK), r11 /*rscratch*/);
__ mulpd(xmm1, xmm2);
__ pshufd(xmm6, xmm1, 78);
__ addsd(xmm1, xmm6);
__ movq(xmm6, xmm1);
__ addsd(xmm1, xmm7);
__ mulsd(xmm1, xmm0);
__ addsd(xmm1, xmm4);
__ andpd(xmm4, ExternalAddress(MASK3), r11 /*rscratch*/);
__ divsd(xmm5, xmm1);
__ subsd(xmm3, xmm4);
__ pshufd(xmm1, xmm0, 238);
__ addsd(xmm3, xmm0);
__ movq(xmm2, xmm4);
__ addsd(xmm3, xmm1);
__ mulsd(xmm1, xmm7);
__ mulsd(xmm7, xmm0);
__ addsd(xmm3, xmm1);
__ addsd(xmm4, xmm7);
__ movsd(xmm1, ExternalAddress(RMASK), r11 /*rscratch*/);
__ mulsd(xmm6, xmm0);
__ andpd(xmm4, ExternalAddress(MASK3), r11 /*rscratch*/);
__ addsd(xmm3, xmm6);
__ movq(xmm6, xmm4);
__ subsd(xmm2, xmm4);
__ addsd(xmm2, xmm7);
__ movsd(xmm7, ExternalAddress(ONEMASK), r11 /*rscratch*/);
__ andpd(xmm5, xmm1);
__ addsd(xmm3, xmm2);
__ mulsd(xmm4, xmm5);
__ xorpd(xmm2, xmm2);
__ mulsd(xmm3, xmm5);
__ subsd(xmm6, ExternalAddress(TWOMASK), r11 /*rscratch*/);
__ subsd(xmm4, xmm7);
__ xorl(rdx, 32768);
__ pinsrw(xmm2, rdx, 3);
__ addsd(xmm4, xmm3);
__ mulsd(xmm6, xmm5);
__ movq(xmm1, xmm3);
__ mulsd(xmm3, xmm4);
__ movq(xmm0, xmm6);
__ mulsd(xmm6, xmm4);
__ subsd(xmm1, xmm3);
__ subsd(xmm1, xmm6);
__ addsd(xmm0, xmm1);
__ xorpd(xmm0, xmm2);
__ jmp(B1_4);
__ bind(L_2TAG_PACKET_0_0_1);
__ addl(rcx, 960);
__ cmpl(rcx, 1094);
__ jcc(Assembler::aboveEqual, L_2TAG_PACKET_1_0_1); // Branch only if |x| not in [2^{-64}, 2^{-4})
__ movdqu(xmm2, ExternalAddress(pv), r11 /*rscratch*/);
__ pshufd(xmm1, xmm0, 68);
__ movdqu(xmm3, ExternalAddress(pv + 16), r11 /*rscratch*/);
__ mulpd(xmm1, xmm1);
__ movdqu(xmm4, ExternalAddress(pv + 32), r11 /*rscratch*/);
__ mulpd(xmm2, xmm1);
__ pshufd(xmm5, xmm1, 68);
__ addpd(xmm2, xmm3);
__ mulsd(xmm5, xmm5);
__ mulpd(xmm2, xmm1);
__ mulsd(xmm5, xmm5);
__ addpd(xmm2, xmm4);
__ mulpd(xmm2, xmm5);
__ pshufd(xmm5, xmm2, 238);
__ addsd(xmm2, xmm5);
__ mulsd(xmm2, xmm0);
__ addsd(xmm0, xmm2);
__ jmp(B1_4);
__ bind(L_2TAG_PACKET_1_0_1);
__ cmpl(rcx, 16);
__ jcc(Assembler::below, L_2TAG_PACKET_3_0_1); // Branch only if |x| is denormalized
__ xorpd(xmm2, xmm2);
__ movl(rax, 17392);
__ pinsrw(xmm2, rax, 3);
__ mulsd(xmm2, xmm0);
__ addsd(xmm2, xmm0);
__ jmp(B1_4);
__ bind(L_2TAG_PACKET_3_0_1);
__ movq(xmm2, xmm0);
__ mulsd(xmm2, xmm2);
__ jmp(B1_4);
__ bind(L_2TAG_PACKET_2_0_1);
__ cmpl(rcx, 32752);
__ jcc(Assembler::aboveEqual, L_2TAG_PACKET_4_0_1); // Branch only if |x| is INF or NaN
__ xorpd(xmm2, xmm2);
__ movl(rcx, 15344);
__ pinsrw(xmm2, rcx, 3);
__ movq(xmm3, xmm2);
__ mulsd(xmm2, xmm2);
__ addsd(xmm2, xmm3);
__ bind(L_2TAG_PACKET_5_0_1);
__ xorpd(xmm0, xmm0);
__ orl(rdx, 16368);
__ pinsrw(xmm0, rdx, 3);
__ jmp(B1_4);
__ bind(L_2TAG_PACKET_4_0_1);
__ movq(xmm2, xmm0);
__ movdl(rax, xmm0);
__ psrlq(xmm2, 20);
__ movdl(rcx, xmm2);
__ orl(rcx, rax);
__ cmpl(rcx, 0);
__ jcc(Assembler::equal, L_2TAG_PACKET_5_0_1); // Branch only if |x| is not NaN
__ addsd(xmm0, xmm0);
__ bind(B1_4);
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
// record the stub entry and end
store_archive_data(stub_id, start, __ pc());
return start;
}
#undef __
#if INCLUDE_CDS
void StubGenerator::init_AOTAddressTable_tanh(GrowableArray<address>& external_addresses) {
#define ADD(addr) external_addresses.append((address)(addr));
address L2E = (address)_L2E;
address cv = (address)_cv;
address pv = (address)_pv;
ADD(L2E);
ADD(L2E + 8);
ADD(_HALFMASK);
ADD(_ONEMASK);
ADD(_TWOMASK);
ADD(_Shifter);
ADD(cv);
ADD(cv + 16);
ADD(cv + 32);
ADD(_T2_neg_f);
ADD(pv);
ADD(pv + 16);
ADD(pv + 32);
ADD(_MASK3);
ADD(_RMASK);
#undef ADD
}
#endif // INCLUDE_CDS