From 8e72d7cf8e7dfc7eb9e66bc562f125f947e37f49 Mon Sep 17 00:00:00 2001 From: Scott Gibbons Date: Fri, 7 Jun 2024 17:02:14 +0000 Subject: [PATCH] 8320448: Accelerate IndexOf using AVX2 Reviewed-by: epeter, kvn, sviswanathan --- src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp | 105 +- src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp | 7 +- .../x86/c2_stubGenerator_x86_64_string.cpp | 1837 +++++++++++++++++ src/hotspot/cpu/x86/stubGenerator_x86_64.cpp | 6 + src/hotspot/cpu/x86/stubGenerator_x86_64.hpp | 3 + src/hotspot/cpu/x86/stubRoutines_x86.hpp | 2 +- src/hotspot/share/opto/escape.cpp | 1 + src/hotspot/share/opto/library_call.cpp | 32 +- src/hotspot/share/opto/runtime.cpp | 21 + src/hotspot/share/opto/runtime.hpp | 1 + src/hotspot/share/runtime/stubRoutines.cpp | 2 + src/hotspot/share/runtime/stubRoutines.hpp | 2 + test/jdk/TEST.ROOT | 5 +- test/jdk/java/lang/String/IndexOf.java | 258 +++ .../java/lang/StringBuffer/ECoreIndexOf.java | 1381 +++++++++++++ .../bench/java/lang/StringIndexOfHuge.java | 273 +++ 16 files changed, 3906 insertions(+), 30 deletions(-) create mode 100644 src/hotspot/cpu/x86/c2_stubGenerator_x86_64_string.cpp create mode 100644 test/jdk/java/lang/String/IndexOf.java create mode 100644 test/jdk/java/lang/StringBuffer/ECoreIndexOf.java create mode 100644 test/micro/org/openjdk/bench/java/lang/StringIndexOfHuge.java diff --git a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp index 50f957aef99..bec63210df9 100644 --- a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp @@ -4491,13 +4491,21 @@ void C2_MacroAssembler::count_positives(Register ary1, Register len, // Compare char[] or byte[] arrays aligned to 4 bytes or substrings. void C2_MacroAssembler::arrays_equals(bool is_array_equ, Register ary1, Register ary2, Register limit, Register result, Register chr, - XMMRegister vec1, XMMRegister vec2, bool is_char, KRegister mask) { + XMMRegister vec1, XMMRegister vec2, bool is_char, + KRegister mask, bool expand_ary2) { + // for expand_ary2, limit is the (smaller) size of the second array. ShortBranchVerifier sbv(this); Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR, COMPARE_BYTE; + assert((!expand_ary2) || ((expand_ary2) && (UseAVX == 2)), + "Expansion only implemented for AVX2"); + int length_offset = arrayOopDesc::length_offset_in_bytes(); int base_offset = arrayOopDesc::base_offset_in_bytes(is_char ? T_CHAR : T_BYTE); + Address::ScaleFactor scaleFactor = expand_ary2 ? Address::times_2 : Address::times_1; + int scaleIncr = expand_ary2 ? 8 : 16; + if (is_array_equ) { // Check the input args cmpoop(ary1, ary2); @@ -4533,14 +4541,20 @@ void C2_MacroAssembler::arrays_equals(bool is_array_equ, Register ary1, Register if (UseAVX >= 2) { // With AVX2, use 32-byte vector compare - Label COMPARE_WIDE_VECTORS, COMPARE_TAIL; + Label COMPARE_WIDE_VECTORS, COMPARE_WIDE_VECTORS_16, COMPARE_TAIL, COMPARE_TAIL_16; // Compare 32-byte vectors - andl(result, 0x0000001f); // tail count (in bytes) - andl(limit, 0xffffffe0); // vector count (in bytes) - jcc(Assembler::zero, COMPARE_TAIL); + if (expand_ary2) { + andl(result, 0x0000000f); // tail count (in bytes) + andl(limit, 0xfffffff0); // vector count (in bytes) + jcc(Assembler::zero, COMPARE_TAIL); + } else { + andl(result, 0x0000001f); // tail count (in bytes) + andl(limit, 0xffffffe0); // vector count (in bytes) + jcc(Assembler::zero, COMPARE_TAIL_16); + } - lea(ary1, Address(ary1, limit, Address::times_1)); + lea(ary1, Address(ary1, limit, scaleFactor)); lea(ary2, Address(ary2, limit, Address::times_1)); negptr(limit); @@ -4583,25 +4597,59 @@ void C2_MacroAssembler::arrays_equals(bool is_array_equ, Register ary1, Register }//if (VM_Version::supports_avx512vlbw()) #endif //_LP64 bind(COMPARE_WIDE_VECTORS); - vmovdqu(vec1, Address(ary1, limit, Address::times_1)); - vmovdqu(vec2, Address(ary2, limit, Address::times_1)); + vmovdqu(vec1, Address(ary1, limit, scaleFactor)); + if (expand_ary2) { + vpmovzxbw(vec2, Address(ary2, limit, Address::times_1), Assembler::AVX_256bit); + } else { + vmovdqu(vec2, Address(ary2, limit, Address::times_1)); + } vpxor(vec1, vec2); vptest(vec1, vec1); jcc(Assembler::notZero, FALSE_LABEL); - addptr(limit, 32); + addptr(limit, scaleIncr * 2); jcc(Assembler::notZero, COMPARE_WIDE_VECTORS); testl(result, result); jcc(Assembler::zero, TRUE_LABEL); - vmovdqu(vec1, Address(ary1, result, Address::times_1, -32)); - vmovdqu(vec2, Address(ary2, result, Address::times_1, -32)); + vmovdqu(vec1, Address(ary1, result, scaleFactor, -32)); + if (expand_ary2) { + vpmovzxbw(vec2, Address(ary2, result, Address::times_1, -16), Assembler::AVX_256bit); + } else { + vmovdqu(vec2, Address(ary2, result, Address::times_1, -32)); + } vpxor(vec1, vec2); vptest(vec1, vec1); - jccb(Assembler::notZero, FALSE_LABEL); - jmpb(TRUE_LABEL); + jcc(Assembler::notZero, FALSE_LABEL); + jmp(TRUE_LABEL); + + bind(COMPARE_TAIL_16); // limit is zero + movl(limit, result); + + // Compare 16-byte chunks + andl(result, 0x0000000f); // tail count (in bytes) + andl(limit, 0xfffffff0); // vector count (in bytes) + jcc(Assembler::zero, COMPARE_TAIL); + + lea(ary1, Address(ary1, limit, scaleFactor)); + lea(ary2, Address(ary2, limit, Address::times_1)); + negptr(limit); + + bind(COMPARE_WIDE_VECTORS_16); + movdqu(vec1, Address(ary1, limit, scaleFactor)); + if (expand_ary2) { + vpmovzxbw(vec2, Address(ary2, limit, Address::times_1), Assembler::AVX_128bit); + } else { + movdqu(vec2, Address(ary2, limit, Address::times_1)); + } + pxor(vec1, vec2); + + ptest(vec1, vec1); + jcc(Assembler::notZero, FALSE_LABEL); + addptr(limit, scaleIncr); + jcc(Assembler::notZero, COMPARE_WIDE_VECTORS_16); bind(COMPARE_TAIL); // limit is zero movl(limit, result); @@ -4646,19 +4694,34 @@ void C2_MacroAssembler::arrays_equals(bool is_array_equ, Register ary1, Register } // Compare 4-byte vectors - andl(limit, 0xfffffffc); // vector count (in bytes) - jccb(Assembler::zero, COMPARE_CHAR); + if (expand_ary2) { + testl(result, result); + jccb(Assembler::zero, TRUE_LABEL); + } else { + andl(limit, 0xfffffffc); // vector count (in bytes) + jccb(Assembler::zero, COMPARE_CHAR); + } - lea(ary1, Address(ary1, limit, Address::times_1)); + lea(ary1, Address(ary1, limit, scaleFactor)); lea(ary2, Address(ary2, limit, Address::times_1)); negptr(limit); bind(COMPARE_VECTORS); - movl(chr, Address(ary1, limit, Address::times_1)); - cmpl(chr, Address(ary2, limit, Address::times_1)); - jccb(Assembler::notEqual, FALSE_LABEL); - addptr(limit, 4); - jcc(Assembler::notZero, COMPARE_VECTORS); + if (expand_ary2) { + // There are no "vector" operations for bytes to shorts + movzbl(chr, Address(ary2, limit, Address::times_1)); + cmpw(Address(ary1, limit, Address::times_2), chr); + jccb(Assembler::notEqual, FALSE_LABEL); + addptr(limit, 1); + jcc(Assembler::notZero, COMPARE_VECTORS); + jmp(TRUE_LABEL); + } else { + movl(chr, Address(ary1, limit, Address::times_1)); + cmpl(chr, Address(ary2, limit, Address::times_1)); + jccb(Assembler::notEqual, FALSE_LABEL); + addptr(limit, 4); + jcc(Assembler::notZero, COMPARE_VECTORS); + } // Compare trailing char (final 2 bytes), if any bind(COMPARE_CHAR); diff --git a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp index 8c22990892b..676382225c2 100644 --- a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp +++ b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp @@ -289,10 +289,11 @@ public: void count_positives(Register ary1, Register len, Register result, Register tmp1, XMMRegister vec1, XMMRegister vec2, KRegister mask1 = knoreg, KRegister mask2 = knoreg); + // Compare char[] or byte[] arrays. - void arrays_equals(bool is_array_equ, Register ary1, Register ary2, - Register limit, Register result, Register chr, - XMMRegister vec1, XMMRegister vec2, bool is_char, KRegister mask = knoreg); + void arrays_equals(bool is_array_equ, Register ary1, Register ary2, Register limit, + Register result, Register chr, XMMRegister vec1, XMMRegister vec2, + bool is_char, KRegister mask = knoreg, bool expand_ary2 = false); void arrays_hashcode(Register str1, Register cnt1, Register result, Register tmp1, Register tmp2, Register tmp3, XMMRegister vnext, diff --git a/src/hotspot/cpu/x86/c2_stubGenerator_x86_64_string.cpp b/src/hotspot/cpu/x86/c2_stubGenerator_x86_64_string.cpp new file mode 100644 index 00000000000..34f8bec8d11 --- /dev/null +++ b/src/hotspot/cpu/x86/c2_stubGenerator_x86_64_string.cpp @@ -0,0 +1,1837 @@ +/* + * Copyright (c) 2024, Intel Corporation. All rights reserved. + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "macroAssembler_x86.hpp" +#include "stubGenerator_x86_64.hpp" +#include "opto/c2_MacroAssembler.hpp" +#include "opto/intrinsicnode.hpp" + +/******************************************************************************/ +// String handling intrinsics +// -------------------------- +// +// Currently implements scheme described in http://0x80.pl/articles/simd-strfind.html +// Implementation can be found at https://github.com/WojciechMula/sse4-strstr +// +// The general idea is as follows: +// 1. Broadcast the first byte of the needle to a ymm register (32 bytes) +// 2. Broadcast the last byte of the needle to a different ymm register +// 3. Compare the first-byte ymm register to the first 32 bytes of the haystack +// 4. Compare the last-byte register to the 32 bytes of the haystack at the (k-1)st position +// where k is the length of the needle +// 5. Logically AND the results of the comparison +// +// The result of the AND yields the position within the haystack where both the first +// and last bytes of the needle exist in their correct relative positions. Check the full +// needle value against the haystack to confirm a match. +// +// This implementation uses memcmp to compare when the size of the needle is >= 32 bytes. +// For other needle sizes, the comparison is done with register compares to eliminate the +// overhead of the call (including range checks, etc.). The size of the comparison is +// known, and it is also known to be safe reading the haystack for the full width of the needle. +// +// The original algorithm as implemented will potentially read past the end of the haystack. +// This implementation protects against that. Instead of reading as many 32-byte chunks as +// possible and then handling the tail, we calculate the last position of a vaild 32-byte +// read and adjust the starting position of the second read such that the last read will not +// go beyond the end of the haystack. So the first comparison is to the first 32 bytes of the +// haystack, and the second is offset by an amount to make the last read legal. The remainder of +// the comparisons are done incrementing by 32 bytes. +// +// This will cause 16 bytes on average to be examined twice, but that is cheaper than the +// logic required for tail processing. +// +/******************************************************************************/ + +#define __ _masm-> +#define __C2 ((C2_MacroAssembler *) _masm)-> + +// Register definitions for consistency +// These registers can be counted on to always contain +// the correct values (once set up) +#define XMM_BYTE_0 xmm0 +#define XMM_BYTE_K xmm1 +#define XMM_BYTE_1 xmm12 +#define save_r12 xmm4 +#define save_r13 xmm5 +#define save_r14 xmm6 +#define save_r15 xmm7 +#define save_rbx xmm8 +#define nMinusK r10 + +// Global temporary xmm registers +#define XMM_TMP1 xmm15 +#define XMM_TMP2 xmm14 +#define XMM_TMP3 xmm2 +#define XMM_TMP4 xmm3 + +// This macro handles clearing the bits of the mask register depending +// on whether we're comparing bytes or words. +#define CLEAR_BIT(mask) \ + if (isU) { \ + __ blsrl(mask, mask); \ + __ blsrl(mask, mask); \ + } else { \ + __ blsrl(mask, mask); \ + } + +#define NUMBER_OF_CASES 10 + +#undef STACK_SPACE +#undef MAX_NEEDLE_LEN_TO_EXPAND +#define MAX_NEEDLE_LEN_TO_EXPAND 0x28 + +// Stack layout: +# define COPIED_HAYSTACK_STACK_OFFSET (0x0) // MUST BE ZERO! +# define COPIED_HAYSTACK_STACK_SIZE (64) // MUST BE 64! + +# define EXPANDED_NEEDLE_STACK_OFFSET (COPIED_HAYSTACK_STACK_OFFSET + COPIED_HAYSTACK_STACK_SIZE) +# define EXPANDED_NEEDLE_STACK_SIZE (MAX_NEEDLE_LEN_TO_EXPAND * 2 + 32) + +# define SAVED_HAYSTACK_STACK_OFFSET (EXPANDED_NEEDLE_STACK_OFFSET + EXPANDED_NEEDLE_STACK_SIZE) +# define SAVED_HAYSTACK_STACK_SIZE (8) + +# define SAVED_INCREMENT_STACK_OFFSET (SAVED_HAYSTACK_STACK_OFFSET + SAVED_HAYSTACK_STACK_SIZE) +# define SAVED_INCREMENT_STACK_SIZE (8) + +# define SAVED_TERM_ADDR_STACK_OFFSET (SAVED_INCREMENT_STACK_OFFSET + SAVED_INCREMENT_STACK_SIZE) +# define SAVED_TERM_ADDR_STACK_SIZE (8) + +# define STACK_SPACE \ + (COPIED_HAYSTACK_STACK_SIZE + EXPANDED_NEEDLE_STACK_SIZE + SAVED_HAYSTACK_STACK_SIZE + SAVED_INCREMENT_STACK_SIZE \ + + SAVED_TERM_ADDR_STACK_SIZE) + +// Forward declarations for helper functions +static void broadcast_additional_needles(bool sizeKnown, int size, Register needle, + Register needleLen, Register rTmp, + StrIntrinsicNode::ArgEncoding ae, MacroAssembler *_masm); + +static void broadcast_first_and_last_needle(Register needle, Register needle_len, Register rTmp, + StrIntrinsicNode::ArgEncoding ae, + MacroAssembler *_masm); + +static void compare_big_haystack_to_needle(bool sizeKnown, int size, Label &noMatch, + Register haystack, Register needleLen, Register eq_mask, + XMMRegister rxTmp1, XMMRegister rxTmp2, + XMMRegister rxTmp3, StrIntrinsicNode::ArgEncoding ae, + MacroAssembler *_masm); + +static void compare_haystack_to_needle(bool sizeKnown, int size, Label &noMatch, Register haystack, + Register eq_mask, Register needleLen, Register rTmp, + XMMRegister rxTmp1, XMMRegister rxTmp2, + StrIntrinsicNode::ArgEncoding ae, MacroAssembler *_masm); + +static void big_case_loop_helper(bool sizeKnown, int size, Label &noMatch, Label &loop_top, + Register eq_mask, Register hsPtrRet, Register needleLen, + Register needle, Register haystack, Register hsLength, + Register rTmp1, Register rTmp2, Register rTmp3, Register rTmp4, + StrIntrinsicNode::ArgEncoding ae, MacroAssembler *_masm); + +static void byte_compare_helper(int size, Label &L_noMatch, Label &L_matchFound, Register needle, + Register needleVal, Register haystack, Register mask, + Register foundIndex, Register tmp, StrIntrinsicNode::ArgEncoding ae, + MacroAssembler *_masm); + +static void highly_optimized_short_cases(StrIntrinsicNode::ArgEncoding ae, Register haystack, + Register haystack_len, Register needle, + Register needle_len, XMMRegister XMM0, XMMRegister XMM1, + Register mask, Register tmp, MacroAssembler *_masm); + +static void setup_jump_tables(StrIntrinsicNode::ArgEncoding ae, Label &L_error, Label &L_checkRange, + Label &L_fixup, address *big_jump_table, address *small_jump_table, + MacroAssembler *_masm); + +static void vpcmpeq(XMMRegister dst, XMMRegister src, Address adr, int vector_len, + StrIntrinsicNode::ArgEncoding ae, MacroAssembler *_masm) { + if ((ae == StrIntrinsicNode::UL) || (ae == StrIntrinsicNode::UU)) { + __ vpcmpeqw(dst, src, adr, vector_len); + } else { + __ vpcmpeqb(dst, src, adr, vector_len); + } +} + +static void generate_string_indexof_stubs(StubGenerator *stubgen, address *fnptrs, + StrIntrinsicNode::ArgEncoding ae, MacroAssembler *_masm); + +//////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////// +// Start of generator +//////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////// + +void StubGenerator::generate_string_indexof(address *fnptrs) { + assert((int) StrIntrinsicNode::LL < 4, "Enum out of range"); + assert((int) StrIntrinsicNode::UL < 4, "Enum out of range"); + assert((int) StrIntrinsicNode::UU < 4, "Enum out of range"); + generate_string_indexof_stubs(this, fnptrs, StrIntrinsicNode::LL, _masm); + generate_string_indexof_stubs(this, fnptrs, StrIntrinsicNode::UU, _masm); + generate_string_indexof_stubs(this, fnptrs, StrIntrinsicNode::UL, _masm); + assert(fnptrs[StrIntrinsicNode::LL] != nullptr, "LL not generated."); + assert(fnptrs[StrIntrinsicNode::UL] != nullptr, "UL not generated."); + assert(fnptrs[StrIntrinsicNode::UU] != nullptr, "UU not generated."); +} + +static void generate_string_indexof_stubs(StubGenerator *stubgen, address *fnptrs, + StrIntrinsicNode::ArgEncoding ae, MacroAssembler *_masm) { + StubCodeMark mark(stubgen, "StubRoutines", "stringIndexOf"); + bool isLL = (ae == StrIntrinsicNode::LL); + bool isUL = (ae == StrIntrinsicNode::UL); + bool isUU = (ae == StrIntrinsicNode::UU); + bool isU = isUL || isUU; // At least one is UTF-16 + assert(isLL || isUL || isUU, "Encoding not recognized"); + + // Keep track of isUL since we need to generate UU code in the main body + // for the case where we expand the needle from bytes to words on the stack. + // This is done at L_wcharBegin. The algorithm used is: + // If the encoding is UL and the needle size is <= MAX_NEEDLE_LEN_TO_EXPAND, + // allocate space on the stack and expand the Latin-1 encoded needle. Then + // effectively "recurse" into the mainline using UU encoding (since both the + // haystack and needle are now UTF-16 encoded). + bool isReallyUL = isUL; + + //////////////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////////////// + // AVX2 code + //////////////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////////////// + assert(VM_Version::supports_avx2(), "Needs AVX2"); + + //////////////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////////////// + // Code generation explanation: + // + // The generator will generate code for three cases: + // 1. Both needle and haystack are Latin-1 (single byte) encoded (LL) + // 2. Both the needle and haystack are UTF-16 encoded (two bytes per character) (UU) + // 3. The haystack is UTF-16 encoded and the needle is Latin-1 encoded (UL) + // + // The case of the haystack being Latin-1 and the needle being UTF-16 is short-circuited + // so that we never get called in this case. + // + // For the UL case (haystack UTF-16 and needle Latin-1), the needle will be expanded + // onto the stack (for size <= MAX_NEEDLE_LEN_TO_EXPAND) and the UU code will do the work. + // For UL where the needle size is > MAX_NEEDLE_LEN_TO_EXPAND and the haystack size minus + // the needle size is less than 32 bytes, we default to a + // byte-by-byte comparison (this will be rare). + // + // Note that the code assumes MAX_NEEDLE_LEN_TO_EXPAND is >= 32. + // + // The UU and LL cases are identical except for the loop increments and loading + // of the characters into registers. UU loads and compares words, LL - bytes. + //////////////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////////////// + + const Register haystack_p = c_rarg0; + const Register haystack_len_p = c_rarg1; + const Register needle_p = c_rarg2; + const Register needle_len_p = c_rarg3; + + // Addresses of the two jump tables used for small needle processing + address big_jump_table; + address small_jump_table; + + Label L_begin; + + Label L_returnError, L_bigCaseFixupAndReturn; + Label L_bigSwitchTop, L_bigCaseDefault, L_smallCaseDefault; + Label L_nextCheck, L_checksPassed, L_return; + Label L_wcharBegin, L_continue, L_wideNoExpand, L_returnR11; + + __ align(CodeEntryAlignment); + fnptrs[ae] = __ pc(); + __ enter(); // required for proper stackwalking of RuntimeStub frame + + // Check for trivial cases + // needle length == 0? + __ cmpq(needle_len_p, 0); + __ jg_b(L_nextCheck); + __ xorl(rax, rax); + __ leave(); + __ ret(0); + + __ bind(L_nextCheck); + // haystack length >= needle length? + __ movq(rax, haystack_len_p); + __ subq(rax, needle_len_p); + __ jge_b(L_checksPassed); + + __ movq(rax, -1); + __ leave(); + __ ret(0); + + __ bind(L_checksPassed); + + // Check for highly-optimized ability - haystack <= 32 bytes and needle <= 6 bytes + // haystack_len is in elements, not bytes, for UTF-16 + __ cmpq(haystack_len_p, isU ? 16 : 32); + __ ja(L_begin); + + // needle_len is in elements, not bytes, for UTF-16 <=> UTF-16 + __ cmpq(needle_len_p, isUU ? 3 : 6); + __ ja(L_begin); + + // Handle short haystack and needle specially + // Generated code does not return - either found or not + highly_optimized_short_cases(ae, haystack_p, haystack_len_p, needle_p, needle_len_p, xmm0, xmm1, + r10, r11, _masm); + + // If we're generating UL, we need to "pretend" we're generating UU code + // for the case where the needle can be expanded onto the stack + if (isReallyUL) { + ae = StrIntrinsicNode::UU; + isUL = false; + isUU = true; + } + + // Set up jump tables. Used when needle size <= NUMBER_OF_CASES + setup_jump_tables(ae, L_returnError, L_returnR11, L_bigCaseFixupAndReturn, &big_jump_table, + &small_jump_table, _masm); + + //////////////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////////////// + // + // The above code handles all cases (LL, UL, UU) for haystack size <= 32 bytes + // and needle size <= 6 bytes. + // + // Main processing proceeds as follows: + // Save state and setup stack, etc. + // If UL, jump to code to handle special-case UL situations (see L_wcharBegin below) + // Broadcast the first and last needle elements to XMM_BYTE_0 and XMM_BYTE_K, respectively + // If the length in bytes of the haystack is > 32, dispatch to the big switch handling code + // If the haystack length in bytes is <= 32: + // Copy the haystack to the stack. This is done to prevent possible page faults and + // allows for reading full 32-byte chunks of the haystack. + // Dispatch to the small switch handling code + // + // Here, "big switch" and "small switch" refers to the haystack size: > 32 bytes for big + // and <= 32 bytes for small. The switches implement optimized code for handling 1 to + // NUMBER_OF_CASES (currently 10) needle sizes for both big and small. There are special + // routines for handling needle sizes > NUMBER_OF_CASES (L_{big,small}CaseDefault). These + // cases use C2's arrays_equals() to compare the needle to the haystack. The small cases + // use specialized code for comparing the needle. + // + // The algorithm currently does vector comparisons for the first, last, and second bytes + // of the needle and, where each of these needle elements matches the correct position + // within the haystack, the "in-between" bytes are compared using the most efficient + // instructions possible for short needles, or C2's arrays_equals for longer needles. + + __ align(CodeEntryAlignment); + + __ bind(L_begin); + __ movdq(save_r12, r12); + __ movdq(save_r13, r13); + __ movdq(save_r14, r14); + __ movdq(save_r15, r15); + __ movdq(save_rbx, rbx); +#ifdef _WIN64 + __ push(rsi); + __ push(rdi); + + // Move to Linux-style ABI + __ movq(rdi, rcx); + __ movq(rsi, rdx); + __ movq(rdx, r8); + __ movq(rcx, r9); +#endif + + const Register haystack = rdi; + const Register haystack_len = rsi; + const Register needle = rdx; + const Register needle_len = rcx; + const Register save_ndl_len = r12; + + __ push(rbp); + __ subptr(rsp, STACK_SPACE); + + if (isReallyUL) { + // Branch out if doing UL + __ jmp(L_wcharBegin); + } + + if (!isReallyUL && isUU) { // Adjust sizes of hs and needle + // UU passes lengths in terms of chars - convert to bytes + __ shlq(needle_len, 1); + __ shlq(haystack_len, 1); + } + + // UL processing comes here after expanding needle + __ bind(L_continue); + // nMinusK (haystack length in bytes minus needle length in bytes) is used several + // places to determine whether a compare will read past the end of the haystack. + __ movq(nMinusK, haystack_len); + __ subq(nMinusK, needle_len); + + // Set up expected registers + __ movq(save_ndl_len, needle_len); + __ movq(r14, needle); + __ movq(rbx, haystack); + + // Always need needle broadcast to ymm registers (XMM_BYTE_0 and XMM_BYTE_K) + broadcast_first_and_last_needle(needle, needle_len, rax, ae, _masm); + + // Do "big switch" if haystack size > 32 + __ cmpq(haystack_len, 0x20); + __ ja_b(L_bigSwitchTop); + + // Copy the small (< 32 byte) haystack to the stack. Allows for vector reads without page fault + // Only done for small haystacks + // + // NOTE: This code assumes that the haystack points to a java array type AND there are + // at least 16 bytes of header preceeding the haystack pointer. + // + // This means that we're copying up to 15 bytes of the header onto the stack along + // with the haystack bytes. After the copy completes, we adjust the haystack pointer + // to the valid haystack bytes on the stack. + { + Label L_moreThan16, L_adjustHaystack; + + const Register index = rax; + const Register haystack = rbx; + + // Only a single vector load/store of either 16 or 32 bytes + __ cmpq(haystack_len, 0x10); + __ ja_b(L_moreThan16); + + __ movq(index, COPIED_HAYSTACK_STACK_OFFSET + 0x10); + __ movdqu(XMM_TMP1, Address(haystack, haystack_len, Address::times_1, -0x10)); + __ movdqu(Address(rsp, COPIED_HAYSTACK_STACK_OFFSET), XMM_TMP1); + __ jmpb(L_adjustHaystack); + + __ bind(L_moreThan16); + __ movq(index, COPIED_HAYSTACK_STACK_OFFSET + 0x20); + __ vmovdqu(XMM_TMP1, Address(haystack, haystack_len, Address::times_1, -0x20)); + __ vmovdqu(Address(rsp, COPIED_HAYSTACK_STACK_OFFSET), XMM_TMP1); + + // Point the haystack at the correct location of the first byte of the "real" haystack on the stack + __ bind(L_adjustHaystack); + __ subq(index, haystack_len); + __ leaq(haystack, Address(rsp, index, Address::times_1)); + } + + // Dispatch to handlers for small needle and small haystack + // Note that needle sizes of 1-6 have been handled in highly_optimized_short_cases, + // so the dispatch only has valid entries for 7-10. + __ leaq(r13, Address(save_ndl_len, -1)); + __ cmpq(r13, NUMBER_OF_CASES - 1); + __ ja(L_smallCaseDefault); + __ lea(r15, InternalAddress(small_jump_table)); + __ jmp(Address(r15, r13, Address::times_8)); + + // Dispatch to handlers for small needle and large haystack + // For large haystacks, the jump table is fully populated (1-10) + __ bind(L_bigSwitchTop); + __ leaq(rax, Address(save_ndl_len, -1)); + __ cmpq(rax, NUMBER_OF_CASES - 1); + __ ja(L_bigCaseDefault); + __ lea(r15, InternalAddress(big_jump_table)); + __ jmp(Address(r15, rax, Address::times_8)); + + //////////////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////////////// + // Fixup and return routines + + // Return not found + __ bind(L_returnError); + __ movq(rax, -1); + __ jmpb(L_return); + + // At this point, rcx has &haystack where match found, rbx has &haystack, + // and r8 has the index where a match was found + __ bind(L_bigCaseFixupAndReturn); + __ subq(rcx, rbx); + __ addq(rcx, r8); + + __ movq(r11, rcx); + + // r11 will contain the valid index. + __ bind(L_returnR11); + __ movq(rax, r11); + + // Restore stack, vzeroupper and return + __ bind(L_return); + __ addptr(rsp, STACK_SPACE); + __ pop(rbp); +#ifdef _WIN64 + __ pop(rdi); + __ pop(rsi); +#endif + __ movdq(r12, save_r12); + __ movdq(r13, save_r13); + __ movdq(r14, save_r14); + __ movdq(r15, save_r15); + __ movdq(rbx, save_rbx); + + // Need to return elements for UTF-16 encodings + if (isU) { + // Return value for UTF-16 is elements, not bytes + // sar is used to preserve -1 + __ sarq(rax, 1); + } + __ vzeroupper(); + + __ leave(); // required for proper stackwalking of RuntimeStub frame + __ ret(0); + + //////////////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////////////// + // + // Big case default: + // + // Handle needle sizes > 10 bytes. Uses C2's arrays_equals to compare the contents + // of the needle to the haystack. + + { + Label L_loopTop, L_innerLoop, L_found; + + const Register hsPtrRet = rax; + const Register mask = r8; + const Register index = r9; + const Register compLen = rbp; + const Register haystackStart = rcx; + const Register rScratch = r13; + const Register needleLen = r12; + const Register needle = r14; + const Register haystack = rbx; + const Register hsLength = rsi; + const Register tmp1 = rdi; + +// #define used for registers that are re-used in the code +#undef retval +#undef firstNeedleCompare +#undef tmp2 +#undef tmp3 +#define tmp2 r15 +#define tmp3 rdx + //////////////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////////////// + // + // Big case default: registers on entry + // + // rbx: haystack + // rcx: k + // rdx: junk + // rsi: n + // rdi: haystack + // r10: n - k + // r12: k + // r13: junk + // r14: needle + // rbp: junk + // XMM_BYTE_0 - first element of needle broadcast + // XMM_BYTE_K - last element of needle broadcast + // + // Set up in big_case_loop_helper + // XMM_BYTE_1 - second element of needle broadcast + + __ bind(L_bigCaseDefault); + + // Loop construct handling for big haystacks + // The helper binds L_loopTop which should be jumped to if potential matches fail to compare + // equal (thus moving on to the next chunk of haystack). If we run out of haystack, the + // helper jumps to L_returnError. + big_case_loop_helper(false, 0, L_returnError, L_loopTop, mask, hsPtrRet, needleLen, needle, + haystack, hsLength, tmp1, tmp2, tmp3, rScratch, ae, _masm); + + // big_case_loop_helper will fall through to this point if one or more potential matches are found + // The mask will have a bitmask indicating the position of the potential matches within the haystack + __ align(OptoLoopAlignment); + __ bind(L_innerLoop); + __ tzcntl(index, mask); + +// Re-use of r15 and rdx +#undef tmp2 +#undef tmp3 +#define retval r15 +#define firstNeedleCompare rdx + + // Need a lot of registers here to preserve state across arrays_equals call + + // Starting address in the haystack + __ leaq(haystackStart, Address(hsPtrRet, index, Address::times_1, isU ? 4 : 2)); + // Starting address of first byte of needle to compare + __ leaq(firstNeedleCompare, Address(needle, isU ? 4 : 2)); + // Number of bytes to compare + __ leaq(compLen, Address(needleLen, isU ? -6 : -3)); + + // Call arrays_equals for both UU and LL cases as bytes should compare exact + __C2 arrays_equals(false, haystackStart, firstNeedleCompare, compLen, retval, rScratch, + XMM_TMP3, XMM_TMP4, false /* char */, knoreg); + __ testl(retval, retval); + __ jne_b(L_found); + + // If more potential matches, continue at inner loop, otherwise go get another vector + CLEAR_BIT(mask); + __ jne(L_innerLoop); + __ jmp(L_loopTop); + + // Found exact match. Compute offset from beginning of haystack + __ bind(L_found); + __ subq(hsPtrRet, haystack); + __ addq(hsPtrRet, index); + __ movq(r11, hsPtrRet); + __ jmp(L_returnR11); + +#undef retval +#undef firstNeedleCompare + } + + //////////////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////////////// + // + // Small case default: + // + // Handle needle sizes > 10 bytes. Uses C2's arrays_equals to compare the contents + // of the needle to the haystack. + + //////////////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////////////// + // + // Small case default: register on entry + // + // rbx: haystack + // r14: needle + // r13: k - 1 + // r12: k + // r10: n - k + // rbp: junk + // rdi: junk + // rsi: n + // rdx: junk + // rcx: junk + // XMM_BYTE_0 - first element of needle broadcast + // XMM_BYTE_K - last element of needle broadcast + // + // Set up in broadcast_additional_needles + // XMM_BYTE_1 - second element of needle broadcast + // + // Haystack always copied to stack, so 32-byte reads OK + // Haystack length <= 32 + // 10 < needle length <= 32 + + { + __ bind(L_smallCaseDefault); + + Label L_innerLoop; + + const Register firstNeedleCompare = rdx; + const Register compLen = r9; + const Register haystack = rbx; + const Register mask = r8; + const Register rTmp = rdi; + const Register rTmp2 = r13; + const Register rTmp3 = rax; + +// r14 and r12 will be re-used later in this procedure +#undef needle +#define needle r14 +#undef needleLen +#define needleLen r12 + + broadcast_additional_needles(false, 0 /* unknown */, needle, needleLen, rTmp3, ae, _masm); + + // For small haystacks we already know that the 1st, 2nd, and last bytes of the needle + // compare equal, so we can reduce the byte count to arrays_equals + __ leaq(firstNeedleCompare, Address(needle, isU ? 4 : 2)); + __ leaq(compLen, Address(needleLen, isU ? -6 : -3)); + + // firstNeedleCompare has address of third element of needle + // compLen has length of comparison to do (3 elements less than needle size) + + // Helper to compare the 1st, 2nd, and last byte of the needle to the haystack + // in the correct position. Since the haystack is < 32 bytes, not finding matching + // needle bytes can just return failure. Otherwise, we loop through the found + // matches. + compare_haystack_to_needle(false, 0, L_returnError, haystack, mask, needleLen, rTmp3, XMM_TMP1, + XMM_TMP2, ae, _masm); + +// NOTE: REGISTER RE-USE for r12 and r14 +#undef needle +#undef saveCompLen +#define saveCompLen r14 +#undef needleLen +#undef saveNeedleAddress +#define saveNeedleAddress r12 + + // Save registers stomped by arrays_equals + __ movq(saveCompLen, compLen); + __ movq(saveNeedleAddress, firstNeedleCompare); // Save address of 2nd element of needle + + // Find index of a potential match + __ align(OptoLoopAlignment); + __ bind(L_innerLoop); + __ tzcntl(r11, mask); + + __ leaq(rTmp, Address(haystack, r11, Address::times_1, isU ? 4 : 2)); + + // Check for needle equality. Handles UU and LL cases since byte comparison should be exact + __C2 arrays_equals(false, rTmp, firstNeedleCompare, compLen, rTmp3, rTmp2, XMM_TMP3, XMM_TMP4, + false /* char */, knoreg); + __ testl(rTmp3, rTmp3); + __ jne(L_returnR11); + + // Restore saved registers + __ movq(compLen, saveCompLen); + __ movq(firstNeedleCompare, saveNeedleAddress); + + // Jump to inner loop if more matches to check, otherwise return not found + CLEAR_BIT(mask); + __ jne(L_innerLoop); + __ jmp(L_returnError); + +#undef saveCompLen +#undef saveNeedleAddress + } + + if (isReallyUL) { + //////////////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////////////// + // Wide char code + //////////////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////////////// + // + // Pseudo-code: + // + // If needle length less than MAX_NEEDLE_LEN_TO_EXPAND, read the needle + // bytes from r14 and write them as words onto the stack. Then go to the + // "regular" UU code. This is equavilent to doing a UU comparison, since the + // haystack will be in UTF-16. + // + // If the needle can't be expanded, process the same way as the default + // cases above. + __ bind(L_wcharBegin); + + // Restore argument encoding from UU back to UL for helpers + ae = StrIntrinsicNode::UL; + isUL = true; + isUU = false; + + Label L_top, L_finished; + + const Register haystack = rdi; + const Register hsLen = rsi; + const Register needle = rdx; + const Register nLen = rcx; + + const Register offset = rax; + const Register index = rbx; + const Register wr_index = r13; + + assert(MAX_NEEDLE_LEN_TO_EXPAND >= 32, "Small UL needles not supported"); + + // haystack length to bytes + __ shlq(hsLen, 1); + + // Ensure haystack >= needle + __ leaq(index, Address(nLen, nLen, Address::times_1)); + __ cmpq(index, hsLen); + __ jg(L_returnError); + + // Can't expand large-ish needles + __ cmpq(nLen, MAX_NEEDLE_LEN_TO_EXPAND); + __ ja(L_wideNoExpand); + + // + // Reads of existing needle are 16-byte chunks + // Writes to copied needle are 32-byte chunks + // Don't read past the end of the existing needle + // + // Start first read at [((ndlLen % 16) - 16) & 0xf] + // outndx += 32 + // inndx += 16 + // cmp nndx, ndlLen + // jae done + // + // Final index of start of needle at ((16 - (ndlLen %16)) & 0xf) << 1 + // + // Starting read for needle at -(16 - (nLen % 16)) + // Offset of needle in stack should be (16 - (nLen % 16)) * 2 + + __ movq(index, needle_len); + __ andq(index, 0xf); // nLen % 16 + __ movq(offset, 0x10); + __ subq(offset, index); // 16 - (nLen % 16) + __ movq(index, offset); + __ shlq(offset, 1); // * 2 + __ negq(index); // -(16 - (nLen % 16)) + __ xorq(wr_index, wr_index); + + __ bind(L_top); + // load needle and expand + __ vpmovzxbw(xmm0, Address(needle, index, Address::times_1), Assembler::AVX_256bit); + // store expanded needle to stack + __ vmovdqu(Address(rsp, wr_index, Address::times_1, EXPANDED_NEEDLE_STACK_OFFSET), xmm0); + __ addq(index, 0x10); + __ cmpq(index, needle_len); + __ jae(L_finished); + __ addq(wr_index, 32); + __ jmpb(L_top); + + // adjust pointer and length of needle + __ bind(L_finished); + __ leaq(needle, Address(rsp, offset, Address::times_1, EXPANDED_NEEDLE_STACK_OFFSET)); + __ leaq(needle_len, Address(needle_len, needle_len)); + + // Go handle this the same as UU + __ jmp(L_continue); + + //////////////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////////////// + // + // Compare Latin-1 encoded needle against UTF-16 encoded haystack. + // + // The needle is more than MAX_NEEDLE_LEN_TO_EXPAND bytes in length, and the haystack + // is at least as big. + + // Prepare for wchar anysize + __ bind(L_wideNoExpand); + + { + Label L_loopTop, L_temp, L_innerLoop, L_found, L_compareFull; + Label doCompare, topLoop; + + //////////////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////////////// + // On entry: + // + // rbx: haystack + // rcx: k + // rdx: junk + // rsi: n + // rdi: haystack + // r10: n - k + // r12: k + // r13: junk + // r14: needle + // rbp: junk + // XMM_BYTE_0 - first element of needle broadcast + // XMM_BYTE_K - last element of needle broadcast + + const Register hsPtrRet = rax; + const Register haystack = rbx; + const Register haystackStart = rcx; + const Register hsLength = rsi; + const Register tmp1 = rdi; + const Register compLen = rbp; + const Register mask = r8; + const Register index = r9; + const Register needleLen = r12; + const Register rScratch = r13; + const Register needle = r14; + + // Move registers into expected registers for rest of this routine + __ movq(rbx, rdi); + __ movq(r12, rcx); + __ movq(r14, rdx); + + // Set up nMinusK + __ movq(tmp1, needleLen); + __ shlq(tmp1, 1); + __ movq(rScratch, hsLength); + __ subq(rScratch, tmp1); + __ movq(nMinusK, rScratch); + + // Check for room for a 32-byte read for the last iteration + __ cmpq(nMinusK, 0x1f); + __ jl(L_compareFull); + + // Always need needle broadcast to ymm registers + broadcast_first_and_last_needle(needle, needleLen, tmp1, ae, _masm); + +// Register redefinition for rbx and r15 +#undef retval +#undef firstNeedleCompare +#undef tmp2 +#undef tmp3 +#define tmp2 r15 +#define tmp3 rdx + + // Loop construct handling for big haystacks + // The helper binds L_loopTop which should be jumped to if potential matches fail to compare + // equal (thus moving on to the next chunk of haystack). If we run out of haystack, the + // helper jumps to L_returnError. + big_case_loop_helper(false, 0, L_returnError, L_loopTop, mask, hsPtrRet, needleLen, needle, + haystack, hsLength, tmp1, tmp2, tmp3, rScratch, ae, _masm); + + // big_case_loop_helper will fall through to this point if one or more potential matches are + // found The mask will have a bitmask indicating the position of the potential matches within + // the haystack + __ align(OptoLoopAlignment); + __ bind(L_innerLoop); + __ tzcntl(index, mask); + +#undef tmp2 +#undef tmp3 +#define retval r15 +#define firstNeedleCompare rdx + + // Note that we're comparing the full needle here even though in some paths + // the 1st, 2nd, and last bytes are already known to be equal. This is necessary + // due to the handling of cases where nMinusK is < 32 + + // Need a lot of registers here to preserve state across arrays_equals call + + // Starting address in the haystack + __ leaq(haystackStart, Address(hsPtrRet, index)); + // Starting address of first byte of needle to compare + __ movq(firstNeedleCompare, needle); + // Number of bytes to compare + __ movq(compLen, needleLen); + + // Passing true as last parameter causes arrays_equals to expand the second array (needle) + // as the comparison is done. + __C2 arrays_equals(false, haystackStart, firstNeedleCompare, compLen, retval, rScratch, + XMM_TMP3, XMM_TMP4, false /* char */, knoreg, true /* expand_ary2 */); + __ testl(retval, retval); + __ jne_b(L_found); + + // If more potential matches, continue at inner loop, otherwise go get another vector + CLEAR_BIT(mask); + __ jne(L_innerLoop); + __ jmp(L_loopTop); + + // Found exact match. Compute offset from beginning of haystack + __ bind(L_found); + __ subq(hsPtrRet, haystack); + __ addq(hsPtrRet, index); + __ movq(r11, hsPtrRet); + __ jmp(L_returnR11); + +#undef retval +#undef firstNeedleCompare + + __ bind(L_compareFull); + + // rScratch has n - k. Compare entire string word-by-word + // Index returned in r11 + __ xorq(r11, r11); + __ movq(nMinusK, rScratch); + __ jmpb(doCompare); + + __ bind(topLoop); + __ addq(r11, 2); + __ cmpq(r11, nMinusK); + __ jg(L_returnError); + + __ bind(doCompare); + __ leaq(r9, Address(haystack, r11)); + __ leaq(r8, Address(needle, 0)); + __ movq(r13, needleLen); + + __C2 arrays_equals(false, r9, r8, r13, rax, rdx, XMM_TMP3, XMM_TMP4, false /* char */, knoreg, + true /* expand_ary2 */); + __ testq(rax, rax); + __ jz(topLoop); + + // Match found + __ jmp(L_returnR11); + } + } + + return; +} + +// Helper for broadcasting needle elements to ymm registers for compares +// Expands into XMM_BYTE_0 and XMM_BYTE_K +// +// For UTF-16 encoded needles, broadcast a word at the proper offset to the ymm +// register (case UU) +// For the UTF-16 encoded haystack with Latin1 encoded needle (case UL) we have +// to read into a temp register to zero-extend the single byte needle value, then +// broadcast words to the ymm register. +// +// Parameters: +// needle - the address of the first byte of the needle +// needle_len - length of needle if !sizeKnown +// rTmp - temp register (for UL only) +// ae - the argument encodings +// _masm - Current MacroAssembler instance pointer +// +// Modifies XMM_BYTE_0 and XMM_BYTE_K +static void broadcast_first_and_last_needle(Register needle, Register needle_len, Register rTmp, + StrIntrinsicNode::ArgEncoding ae, + MacroAssembler *_masm) { + bool isUL = (ae == StrIntrinsicNode::UL); + bool isUU = (ae == StrIntrinsicNode::UU); + bool isU = (isUU || isUL); + Label L_short; + + // Always need needle broadcast to ymm registers + // Broadcast the beginning of needle into a vector register. + if (isUU) { + __ vpbroadcastw(XMM_BYTE_0, Address(needle, 0), Assembler::AVX_256bit); + } else if (isUL) { + + __ movzbl(rTmp, Address(needle)); + __ movdl(XMM_BYTE_0, rTmp); + // 1st byte of needle in words + __ vpbroadcastw(XMM_BYTE_0, XMM_BYTE_0, Assembler::AVX_256bit); + } else { + __ vpbroadcastb(XMM_BYTE_0, Address(needle, 0), Assembler::AVX_256bit); + } + + // Broadcast the end of needle into a vector register. + // For a single-element needle this is redundant but does no harm and + // reduces code size as opposed to broadcasting only if used. + if (isUU) { + __ vpbroadcastw(XMM_BYTE_K, Address(needle, needle_len, Address::times_1, -2), + Assembler::AVX_256bit); + } else if (isUL) { + __ movzbl(rTmp, Address(needle, needle_len, Address::times_1, -1)); + __ movdl(XMM_BYTE_K, rTmp); + // last byte of needle in words + __ vpbroadcastw(XMM_BYTE_K, XMM_BYTE_K, Assembler::AVX_256bit); + } else { + __ vpbroadcastb(XMM_BYTE_K, Address(needle, needle_len, Address::times_1, -1), + Assembler::AVX_256bit); + } + + __ bind(L_short); +} + +// Helper for broadcasting the 2nd needle element to XMM_BYTE_1 +// +// For UTF-16 encoded needles, broadcast a word at the proper offset to the ymm +// register (case UU) +// For the UTF-16 encoded haystack with Latin1 encoded needle (case UL) we have +// to read into a temp register to zero-extend the single byte needle value, then +// broadcast words to the ymm register. +// +// Parameters: +// sizeKnown - True if needle size known at compile time +// size - the size of the needle. Pass 0 if unknown at compile time +// needle - the address of the first byte of the needle +// needleLen - length of needle if !sizeKnown +// rTmp - temp register (for UL only) +// ae - Argument encoding +// _masm - Current MacroAssembler instance pointer +// +// Modifies XMM_BYTE_1 +static void broadcast_additional_needles(bool sizeKnown, int size, Register needle, + Register needleLen, Register rTmp, + StrIntrinsicNode::ArgEncoding ae, MacroAssembler *_masm) { + Label L_done; + + assert_different_registers(needle, needleLen, rTmp); + + bool isUL = (ae == StrIntrinsicNode::UL); + bool isUU = (ae == StrIntrinsicNode::UU); + bool isU = (isUU || isUL); + + size = sizeKnown ? size : NUMBER_OF_CASES + 1; + + // Need code to determine whether it's valid to use second byte of + // needle if the size isn't known at compile-time + if (!sizeKnown) { + __ cmpq(needleLen, (isU ? 4 : 2)); + __ jl_b(L_done); + } + + if (size > (isU ? 4 : 2)) { + // Add compare for second byte + if (isUU) { + __ vpbroadcastw(XMM_BYTE_1, Address(needle, 2), Assembler::AVX_256bit); + } else if (isUL) { + __ movzbl(rTmp, Address(needle, 1)); + __ movdl(XMM_BYTE_1, rTmp); + // 1st byte of needle in words + __ vpbroadcastw(XMM_BYTE_1, XMM_BYTE_1, Assembler::AVX_256bit); + } else { + __ vpbroadcastb(XMM_BYTE_1, Address(needle, 1), Assembler::AVX_256bit); + } + } + + __ bind(L_done); +} + +// Helper for comparing needle elements to a big haystack +// +// This helper compares bytes or words in the ymm registers to +// the proper positions within the haystack. It will bail out early if +// no match found, otherwise it will progressively and together +// the comparison results, returning the answer at the end. +// +// On return, eq_mask will be set to the comparison mask value. If no match +// is found, this helper will jump to noMatch. +// +// Parameters: +// sizeKnown - True if size known at compile time +// size - the size of the needle in bytes. Pass 0 if unknown at compile time +// noMatch - label bound outside to jump to if there is no match +// haystack - the address of the first byte of the haystack +// hsLen - the sizeof the haystack in bytes +// needleLen - size of the needle in bytes known at runtime +// eq_mask - The bit mask returned that holds the result of the comparison +// rxTmp1 - a temporary xmm register +// rxTmp2 - a temporary xmm register +// rxTmp3 - a temporary xmm register +// ae - Argument encoding +// _masm - Current MacroAssembler instance pointer +// +// (n - k) will always be >= 32 on entry +static void compare_big_haystack_to_needle(bool sizeKnown, int size, Label &noMatch, + Register haystack, Register needleLen, Register eq_mask, + XMMRegister rxTmp1, XMMRegister rxTmp2, + XMMRegister rxTmp3, StrIntrinsicNode::ArgEncoding ae, + MacroAssembler *_masm) { + + assert_different_registers(eq_mask, haystack, needleLen, nMinusK); + + const XMMRegister result = rxTmp1; + const XMMRegister cmp_0 = rxTmp2; + const XMMRegister cmp_k = rxTmp3; + + bool isUL = (ae == StrIntrinsicNode::UL); + bool isUU = (ae == StrIntrinsicNode::UU); + bool isU = (isUU || isUL); + + int sizeIncr = isU ? 2 : 1; + + Label L_OKtoCompareFull, L_done, L_specialCase_gt2; + + assert(!sizeKnown || (sizeKnown && ((size > 0) && (size <= NUMBER_OF_CASES))), "Incorrect size given"); + + // Address of the kth byte of the needle within the haystack + Address kThByte = sizeKnown ? Address(haystack, size - sizeIncr) + : Address(haystack, needleLen, + isUL ? Address::times_2 : Address::times_1, -(sizeIncr)); + size = sizeKnown ? size : NUMBER_OF_CASES + 1; + + // Compare first byte of needle to haystack + vpcmpeq(cmp_0, XMM_BYTE_0, Address(haystack, 0), Assembler::AVX_256bit, ae, _masm); + + __ vpmovmskb(eq_mask, cmp_0, Assembler::AVX_256bit); + + // If the needle is a single element (at compile time) no need to compare more + if (size != sizeIncr) { + // Compare last byte of needle to haystack at proper position + vpcmpeq(cmp_k, XMM_BYTE_K, kThByte, Assembler::AVX_256bit, ae, _masm); + + __ vpand(result, cmp_k, cmp_0, Assembler::AVX_256bit); + + if (size > sizeIncr * 2) { + vpcmpeq(cmp_k, XMM_BYTE_1, Address(haystack, 1 * sizeIncr), Assembler::AVX_256bit, ae, _masm); + __ vpand(result, cmp_k, result, Assembler::AVX_256bit); + } + + __ vpmovmskb(eq_mask, result, Assembler::AVX_256bit); + } + + __ bind(L_done); + __ testl(eq_mask, eq_mask); + __ je(noMatch); + // At this point, we have at least one "match" where first and last bytes + // of the needle are found the correct distance apart. +} + +// Helper for comparing needle elements to a small haystack +// +// This helper compares bytes or words in the ymm registers to +// the proper positions within the haystack. It will bail out early if +// a match is not found, otherwise it will progressively and together +// the comparison results, returning the answer at the end. +// +// On return, eq_mask will be set to the comparison mask value. If no match +// is found, this helper will jump to noMatch. +// +// Parameters: +// sizeKnown - if true, size is valid and needleLen invalid. +// if false, size invalid and needleLen valid. +// size - the size of the needle. Pass 0 if unknown at compile time +// noMatch - label bound outside to jump to if there is no match +// haystack - the address of the first byte of the haystack +// eq_mask - The bit mask returned that holds the result of the comparison +// needleLen - Length of the needle in bytes. Only used if isUL true +// rTmp - temporary register +// rxTmp1 - temporary xmm register +// rxTmp2 - temporary xmm register +// ae - Argument encoding +// _masm - Current MacroAssembler instance pointer +// +// No need to worry about reading past end of haystack since haystack +// has been copied to the stack +// +// If !sizeKnown, needle is at least 11 bytes long +static void compare_haystack_to_needle(bool sizeKnown, int size, Label &noMatch, Register haystack, + Register eq_mask, Register needleLen, Register rTmp, + XMMRegister rxTmp1, XMMRegister rxTmp2, + StrIntrinsicNode::ArgEncoding ae, MacroAssembler *_masm) { + + assert_different_registers(eq_mask, haystack, needleLen, rTmp, nMinusK); + + // NOTE: cmp_0 and result are the same register + const XMMRegister cmp_0 = rxTmp1; + const XMMRegister result = rxTmp1; + const XMMRegister cmp_k = rxTmp2; + + bool isUL = (ae == StrIntrinsicNode::UL); + bool isUU = (ae == StrIntrinsicNode::UU); + bool isU = isUL || isUU; // At least one is UTF-16 + + int sizeIncr = isU ? 2 : 1; + + assert((!sizeKnown) || (((size > 0) && (size <= NUMBER_OF_CASES))), "Incorrect size given"); + + // Address of the kth byte of the needle within the haystack + Address kThByte = sizeKnown ? Address(haystack, size - sizeIncr) + : Address(haystack, needleLen, Address::times_1, -(sizeIncr)); + size = sizeKnown ? size : NUMBER_OF_CASES + 1; + + // Creates a mask of (n - k + 1) ones. This prevents + // recognizing any false-positives past the end of + // the valid haystack. + __ movq(rTmp, -1); + __ movq(eq_mask, nMinusK); + __ addq(eq_mask, 1); + __ bzhiq(rTmp, rTmp, eq_mask); + + // Compare first byte of needle to haystack + vpcmpeq(cmp_0, XMM_BYTE_0, Address(haystack, 0), Assembler::AVX_256bit, ae, _masm); + if (size != sizeIncr) { + // Compare last byte of needle to haystack at proper position + vpcmpeq(cmp_k, XMM_BYTE_K, kThByte, Assembler::AVX_256bit, ae, _masm); + + __ vpand(result, cmp_k, cmp_0, Assembler::AVX_256bit); + + if (size > (sizeIncr * 2)) { + vpcmpeq(cmp_k, XMM_BYTE_1, Address(haystack, 1 * sizeIncr), Assembler::AVX_256bit, ae, _masm); + __ vpand(result, cmp_k, result, Assembler::AVX_256bit); + } + } + + __ vpmovmskb(eq_mask, result, Assembler::AVX_256bit); + __ andl(eq_mask, rTmp); + + __ testl(eq_mask, eq_mask); + __ je(noMatch); + // At this point, we have at least one "match" where first and last bytes + // of the needle are found the correct distance apart. +} + +//////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////// +// Helper for big haystack loop construct +// +// For UTF-16 encoded needles, broadcast a word at the proper offset to the ymm +// register (case UU) +// For the UTF-16 encoded haystack with Latin1 encoded needle (case UL) we have +// to read into a temp register to zero-extend the single byte needle value, then +// broadcast words to the ymm register. +// +// Parameters: +// sizeKnown - if true, size is valid and needleLen invalid. +// size - the size of the needle. Pass 0 if unknown at compile time +// noMatch - label bound outside to jump to if there is no match +// loop_top - label bound inside this helper that should be branched to +// for additional comparisons. +// eq_mask - The bit mask returned that holds the result of the comparison +// hsPtrRet - This will hold the place within the needle where a match is found +// This is modified +// needleLen - The length of the needle +// needle - Address of the needle +// haystack - Address of the haystack +// hsLength - The length of the haystack +// rTmp1 - Temporary +// rTmp2 - Temporary +// rTmp3 - Temporary +// rTmp4 - Temporary +// ae - Argument encoding +// _masm - Current MacroAssembler instance pointer +// +// On entry: +// +// rbx: haystack +// rcx: k +// rdx: junk +// rsi: n +// rdi: haystack +// r10: n - k +// r12: k +// r13: junk +// r14: needle +// rbp: junk +// XMM_BYTE_0 - first element of needle broadcast +// XMM_BYTE_K - last element of needle broadcast + +static void big_case_loop_helper(bool sizeKnown, int size, Label &noMatch, Label &loop_top, + Register eq_mask, Register hsPtrRet, Register needleLen, + Register needle, Register haystack, Register hsLength, + Register rTmp1, Register rTmp2, Register rTmp3, Register rTmp4, + StrIntrinsicNode::ArgEncoding ae, MacroAssembler *_masm) { + Label L_midLoop, L_greaterThan32, L_out; + + assert_different_registers(eq_mask, hsPtrRet, needleLen, rdi, r15, rdx, rsi, rbx, r14, nMinusK); + + const Register last = rTmp1; + const Register temp1 = rTmp2; + const Register temp2 = rTmp3; + const Register temp3 = rTmp4; + + bool isUL = (ae == StrIntrinsicNode::UL); + bool isUU = (ae == StrIntrinsicNode::UU); + bool isU = isUL || isUU; // At least one is UTF-16 + + // Assume failure + __ movq(r11, -1); + + broadcast_additional_needles(sizeKnown, size, needle, needleLen, temp1, ae, _masm); + + __ cmpq(nMinusK, 31); + __ jae_b(L_greaterThan32); + + // Here the needle is too long, so we can't do a 32-byte read to compare the last element. + // + // Instead we match the first two characters, read from the end of the haystack + // back 32 characters, shift the result, compare and check that way. + // + // Set last to hsPtrRet so the next attempt at loop iteration ends the compare. + __ movq(last, haystack); + __ movq(hsPtrRet, haystack); + + // Compare first element of needle to haystack + vpcmpeq(XMM_TMP3, XMM_BYTE_0, Address(haystack, 0), Assembler::AVX_256bit, ae, _masm); + + __ vpmovmskb(eq_mask, XMM_TMP3, Assembler::AVX_256bit); + + if (!sizeKnown || (sizeKnown && (size > (isU ? 4 : 2)))) { + // Compare second element of needle to haystack and mask result + vpcmpeq(XMM_TMP3, XMM_BYTE_1, Address(haystack, isU ? 2 : 1), Assembler::AVX_256bit, ae, _masm); + + __ vpmovmskb(temp1, XMM_TMP3, Assembler::AVX_256bit); + __ andq(eq_mask, temp1); + } + + // Compare last element of needle to haystack, shift and mask result + vpcmpeq(XMM_TMP3, XMM_BYTE_K, Address(haystack, hsLength, Address::times_1, -32), + Assembler::AVX_256bit, ae, _masm); + + __ vpmovmskb(temp1, XMM_TMP3, Assembler::AVX_256bit); + + // Compute the proper shift value. If we let k be the needle length and n be the haystack + // length, we should be comparing to haystack[k - 1] through haystack[k - 1 + 31]. Since + // (n - k) < 32, (k - 1 + 31) would be past the end of the haystack. So the shift value + // is computed as (k + 31 - n). + // + // Clarification: The BYTE_K compare above compares haystack[(n-32):(n-1)]. We need to + // compare haystack[(k-1):(k-1+31)]. Subtracting either index gives shift value of + // (k + 31 - n): x = (k-1+31)-(n-1) = k-1+31-n+1 = k+31-n. + if (sizeKnown) { + __ movl(temp2, 31 + size); + } else { + __ movl(temp2, 31); + __ addl(temp2, needleLen); + } + __ subl(temp2, hsLength); + __ shrxl(temp1, temp1, temp2); + __ andl(eq_mask, temp1); + + __ testl(eq_mask, eq_mask); + __ je(noMatch); + + __ jmp(L_out); + + __ bind(L_greaterThan32); + + // Read 32-byte chunks at a time until the last 32-byte read would go + // past the end of the haystack. Then, set the final read to read exactly + // the number of bytes in the haystack. + // For example, if haystack length is 45 and needle length is 13, the compares + // will read the following bytes: + // + // First compare Last compare + // [ 0 : 31] [12 : 43] + // Next compare will go past end of haystack ([32:63]) + // Adjust so final read is: + // [ 1 : 32] [13 : 44] + + __ movq(hsPtrRet, haystack); + __ leaq(last, Address(haystack, nMinusK, Address::times_1, isU ? -30 : -31)); + __ jmpb(L_midLoop); + + __ align(OptoLoopAlignment); + __ bind(loop_top); + // An equal comparison indicates completion with no match + __ cmpq(hsPtrRet, last); + __ je(noMatch); + __ addq(hsPtrRet, 32); + + // If next compare will go beyond end of haystack adjust start of read + // back to last valid read position + __ cmpq(hsPtrRet, last); + __ jbe_b(L_midLoop); + __ movq(hsPtrRet, last); + + __ bind(L_midLoop); + + // compare_big_haystack_to_needle will jump to loop_top until a match has been + // found + compare_big_haystack_to_needle(sizeKnown, size, loop_top, hsPtrRet, needleLen, eq_mask, XMM_TMP1, + XMM_TMP2, XMM_TMP3, ae, _masm); + + // At this point, we have at least one "match" where first and last bytes + // of the needle are found the correct distance apart. + // + // NOTE: haystack (rbx) should be preserved; hsPtrRet(rcx) is expected to + // point to the haystack such that hsPtrRet[tzcntl(eq_mask)] points to + // the matched string. + + __ bind(L_out); +} + +//////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////// +// Helper for comparing small needles to the haystack after a potential match found. +// +// Parameters: +// size - The size of the needle in bytes +// L_noMatch - Label to jump to if needle does not match haystack at this location +// L_matchFound - Label to jump to if needle matches haystack at this location +// needle - the address of the first byte of the needle +// needleVal - The bytes of the needle to compare +// haystack - The address of the first byte of the haystack +// mask - The comparison mask from comparing the first 2 and last elements of the needle +// foundIndex - The index within the haystack of the match +// tmp - A temporary register +// ae - the argument encodings +// _masm - Current MacroAssembler instance pointer +// +// Branches to either L_noMatch or L_matchFound depending on the result of the comparison +// foundIndex will contain the index within the haystack of the match for L_matchFound + +static void byte_compare_helper(int size, Label &L_noMatch, Label &L_matchFound, Register needle, + Register needleVal, Register haystack, Register mask, + Register foundIndex, Register tmp, StrIntrinsicNode::ArgEncoding ae, + MacroAssembler *_masm) { + // Compare size bytes of needle to haystack + // + // At a minimum, the first, second and last bytes of needle already compare equal + // to the haystack, so there is no need to compare them again. + + Label L_loopTop; + + assert_different_registers(needle, needleVal, haystack, mask, foundIndex, tmp); + + bool isUL = (ae == StrIntrinsicNode::UL); + bool isUU = (ae == StrIntrinsicNode::UU); + bool isU = isUL || isUU; // At least one is UTF-16 + + int bytesAlreadyCompared = 0; + int bytesLeftToCompare = 0; + int offsetOfFirstByteToCompare = 0; + + Label temp; + + // Getting her we already have the first two and last elements of the needle + // comparing equal, so no need to compare them again + bytesAlreadyCompared = isU ? 6 : 3; + offsetOfFirstByteToCompare = isU ? 4 : 2; + + bytesLeftToCompare = size - bytesAlreadyCompared; + assert(bytesLeftToCompare <= 7, "Too many bytes left to compare"); + + // The needle is <= 3 elements long, so the ultimate result comes from the mask + if (bytesLeftToCompare <= 0) { + __ tzcntl(foundIndex, mask); + __ jmp(L_matchFound); + return; + } + + // At this point, there is at least one byte of the needle that needs to be + // compared to the haystack. + + // Pre-load the needle bytes to compare here + switch (bytesLeftToCompare) { + case 1: + case 2: + // Load for needle size of 4 and 5 bytes + __ movl(needleVal, Address(needle, (offsetOfFirstByteToCompare - 2))); + break; + + case 3: + case 4: + // Load for needle size of 6 and 7 bytes + __ movl(needleVal, Address(needle, offsetOfFirstByteToCompare)); + break; + + case 5: + case 6: + // Load for needle size of 8 and 9 bytes + __ movq(needleVal, Address(needle, (offsetOfFirstByteToCompare - 2))); + break; + + case 7: + // Load for needle size of 10 bytes + __ movq(needleVal, Address(needle, offsetOfFirstByteToCompare)); + break; + + default: + break; + } + + __ align(OptoLoopAlignment); + __ bind(L_loopTop); + __ tzcntl(foundIndex, mask); // Index of match within haystack + + switch (bytesLeftToCompare) { + case 1: + case 2: + // Comparison for needle size of 4 and 5 bytes + __ cmpl(Address(haystack, foundIndex, Address::times_1, offsetOfFirstByteToCompare - 2), + needleVal); + __ je(L_matchFound); + break; + + case 3: + case 4: + // Comparison for needle size of 6 and 7 bytes + __ cmpl(Address(haystack, foundIndex, Address::times_1, offsetOfFirstByteToCompare), needleVal); + __ je(L_matchFound); + break; + + case 5: + case 6: + // Comparison for needle size of 8 and 9 bytes + __ cmpq(Address(haystack, foundIndex, Address::times_1, offsetOfFirstByteToCompare - 2), + needleVal); + __ je(L_matchFound); + break; + + case 7: + // Comparison for needle size of 10 bytes + __ cmpq(Address(haystack, foundIndex, Address::times_1, offsetOfFirstByteToCompare), needleVal); + __ je(L_matchFound); + break; + + default: + break; + } + + CLEAR_BIT(mask); // Loop as long as there are other bits set + __ jne(L_loopTop); + __ jmp(L_noMatch); +} + +// highly_optimized_short_cases +// We can handle the cases where haystack size is <= 32 bytes and needle size <= 6 bytes +// as a special case. We first copy the haystack tpo the stack to avoid page faults. A mask is +// generated with (n - k + 1) bits set that ensures matches past the end of the original +// haystack do not get considered during compares. In this equation, n is length of haystack +// and k is length of needle. +// +// A vector compare for the first needle byte is done against the haystack and anded with the mask. +// For needle size == 1, if there's a match we found it, otherwise failure. The 2nd position +// of the needle is compared starting from the 2nd position of the haystack and anded with the +// mask. If needle size == 2 and a match is found, success else failure. This continues for +// all needle sizes up to 6 bytes. +// +// ae - Argument encoding +// haystack - The address of the haystack +// haystack_len - the length of the haystack in elements +// needle - The address of the needle +// needle_len - the length of the needle in elements +// XMM0 - Temporary xmm register +// XMM1 - Temporary xmm register +// mask - Used to hold comparison mask +// tmp - Temporary register +// _masm - Current MacroAssembler instance pointer +static void highly_optimized_short_cases(StrIntrinsicNode::ArgEncoding ae, Register haystack, + Register haystack_len, Register needle, + Register needle_len, XMMRegister XMM0, XMMRegister XMM1, + Register mask, Register tmp, MacroAssembler *_masm) { + // Highly optimized special-cases + Label L_noMatch, L_foundall, L_out; + + bool isUL = (ae == StrIntrinsicNode::UL); + bool isUU = (ae == StrIntrinsicNode::UU); + bool isU = isUL || isUU; // At least one is UTF-16 + + // Only optimize when haystack can fit on stack with room + // left over for page fault prevention + assert((COPIED_HAYSTACK_STACK_OFFSET == 0), "Must be zero!"); + assert((COPIED_HAYSTACK_STACK_SIZE == 64), "Must be 64!"); + + // Copy incoming haystack onto stack + { + Label L_adjustHaystack, L_moreThan16; + + // Copy haystack to stack (haystack <= 32 bytes) + __ subptr(rsp, COPIED_HAYSTACK_STACK_SIZE); + __ cmpq(haystack_len, isU ? 0x8 : 0x10); + __ ja_b(L_moreThan16); + + __ movq(tmp, COPIED_HAYSTACK_STACK_OFFSET + 0x10); + __ movdqu(XMM0, Address(haystack, haystack_len, isU ? Address::times_2 : Address::times_1, -0x10)); + __ movdqu(Address(rsp, COPIED_HAYSTACK_STACK_OFFSET), XMM0); + __ jmpb(L_adjustHaystack); + + __ bind(L_moreThan16); + __ movq(tmp, COPIED_HAYSTACK_STACK_OFFSET + 0x20); + __ vmovdqu(XMM0, Address(haystack, haystack_len, isU ? Address::times_2 : Address::times_1, -0x20)); + __ vmovdqu(Address(rsp, COPIED_HAYSTACK_STACK_OFFSET), XMM0); + + __ bind(L_adjustHaystack); + __ subptr(tmp, haystack_len); + + if (isU) { + // For UTF-16, lengths are half + __ subptr(tmp, haystack_len); + } + // Point the haystack to the stack + __ leaq(haystack, Address(rsp, tmp, Address::times_1)); + } + + // Creates a mask of (n - k + 1) ones. This prevents recognizing any false-positives + // past the end of the valid haystack. + __ movq(mask, -1); + __ subq(haystack_len, needle_len); + __ incrementq(haystack_len); + if (isU) { + __ shlq(haystack_len, 1); + } + __ bzhiq(mask, mask, haystack_len); + + // Loop for each needle size from 1 to 6 bytes long. For UU, only 3 elements. + for (int size = 1; size <= (isUU ? 3 : 6); size++) { + // Broadcast next needle byte into ymm register + int needle_position = isUU ? (size - 1) * 2 : size - 1; + int haystack_position = isU ? (size - 1) * 2 : size - 1; + if (isUU) { + __ vpbroadcastw(XMM0, Address(needle, needle_position), Assembler::AVX_256bit); + } else if (isUL) { + // Expand needle + __ movzbl(tmp, Address(needle, needle_position)); + __ movdl(XMM0, tmp); + // Byte of needle to words + __ vpbroadcastw(XMM0, XMM0, Assembler::AVX_256bit); + } else { + __ vpbroadcastb(XMM0, Address(needle, needle_position), Assembler::AVX_256bit); + } + + // Compare next byte. Keep the comparison mask in mask, which will + // accumulate + vpcmpeq(XMM1, XMM0, Address(haystack, haystack_position), Assembler::AVX_256bit, ae, _masm); + __ vpmovmskb(tmp, XMM1, Assembler::AVX_256bit); + __ andq(mask, tmp); // Accumulate matched bytes + __ testl(mask, mask); + __ je(L_noMatch); + + if (size != (isUU ? 3 : 6)) { + // Found a match for this needle size + __ cmpq(needle_len, size); + __ je(L_foundall); + } + } + + __ bind(L_foundall); + __ tzcntl(rax, mask); + + if (isU) { + __ shrl(rax, 1); + } + + __ bind(L_out); + __ addptr(rsp, COPIED_HAYSTACK_STACK_SIZE); + __ vzeroupper(); + __ leave(); + __ ret(0); + + __ bind(L_noMatch); + __ movq(rax, -1); + __ jmpb(L_out); +} + +//////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////// +// +// Set up jump table entries for both small and large haystack switches. +// +// ae - Argument encoding +// L_error - Label to branch to if no match found +// L_checkRange - label to jump to when match found. Checks validity of returned index +// L_fixup - Jump to here for big cases. Return value is pointer to matching haystack byte +// *big_jump_table - Address of pointer to the first element of big jump table +// *small_jump_table - Address of pointer to the first element of small jump table +// _masm - Current MacroAssembler instance pointer + +static void setup_jump_tables(StrIntrinsicNode::ArgEncoding ae, Label &L_error, Label &L_checkRange, + Label &L_fixup, address *big_jump_table, address *small_jump_table, + MacroAssembler *_masm) { + bool isUL = (ae == StrIntrinsicNode::UL); + bool isUU = (ae == StrIntrinsicNode::UU); + bool isU = isUL || isUU; // At least one is UTF-16 + const XMMRegister byte_1 = XMM_BYTE_1; + + address big_hs_jmp_table[NUMBER_OF_CASES]; // Jump table for large haystacks + address small_hs_jmp_table[NUMBER_OF_CASES]; // Jump table for small haystacks + int jmp_ndx = 0; + + //////////////////////////////////////////////// + // On entry to each case, the register state is: + // + // rax = unused + // rbx = &haystack + // rcx = haystack length + // rdx = &needle + // rsi = haystack length + // rdi = &haystack + // rbp = unused + // r8 = unused + // r9 = unused + // r10 = hs_len - needle len + // r11 = unused + // r12 = needle length + // r13 = (needle length - 1) + // r14 = &needle + // r15 = unused + // XMM_BYTE_0 - first element of needle, broadcast + // XMM_BYTE_K - last element of needle, broadcast + + { + //////////////////////////////////////////////////////////////////////////////////////// + // + // Small haystack (<=32 bytes) switch + // + // Handle cases that were not handled in highly_optimized_short_cases, which will be + // haystack size <= 32 bytes with 6 < needle size < NUMBER_OF_CASES bytes. + + //////////////////////////////////////////////// + // The haystack is <= 32 bytes + // + // If a match is not found, branch to L_error (which will always + // return -1). + // + // If a match is found, jump to L_checkRange, which ensures the + // matched needle is not past the end of the haystack. + // + // The index where a match is found is returned in set_bit (r11). + + const Register haystack = rbx; + const Register needle = r14; + const Register needle_val = r8; + const Register set_bit = r11; + const Register eq_mask = rsi; + const Register rTmp = rax; + + for (int i = 6; i < NUMBER_OF_CASES; i++) { + small_hs_jmp_table[i] = __ pc(); + if (isU && ((i + 1) & 1)) { + continue; + } else { + broadcast_additional_needles(true, i + 1, needle, noreg, rTmp, ae, _masm); + + compare_haystack_to_needle(true, i + 1, L_error, haystack, eq_mask, noreg, rTmp, XMM_TMP1, + XMM_TMP2, ae, _masm); + + byte_compare_helper(i + 1, L_error, L_checkRange, needle, needle_val, haystack, eq_mask, + set_bit, rTmp, ae, _masm); + } + } + } + + //////////////////////////////////////////////////////////////////////////////////////// + // + // Large haystack (> 32 bytes) switch + + { + //////////////////////////////////////////////// + // The haystack is > 32 bytes + // + // The value returned on a match is in hs_ptr (rcx) which is the address + // of the first matching byte within the haystack. The L_fixup label + // takes hs_ptr (rcx), haystack (rbx), and set_bit (r8) to compute the + // index as: hs_ptr - haystack + r8. hs_ptr - haystack is the offset + // within the haystack of the 32-byte chunk wherein a match was found, + // and set_bit is the index within that 32-byte chunk of the matching string. + + const Register haystack = rbx; + const Register needle = r14; + const Register needle_len = r12; + const Register needle_val = r15; + const Register set_bit = r8; + const Register eq_mask = r9; + const Register hs_ptr = rcx; + const Register hsLength = rsi; + const Register rTmp1 = rdi; + const Register rTmp2 = r15; + const Register rTmp3 = rdx; + const Register rTmp4 = r13; + + for (int i = 0; i < NUMBER_OF_CASES; i++) { + big_hs_jmp_table[i] = __ pc(); + if (isU && ((i + 1) & 1)) { + continue; + } else { + Label L_loopTop; + + big_case_loop_helper(true, i + 1, L_error, L_loopTop, eq_mask, hs_ptr, needle_len, + needle, haystack, hsLength, rTmp1, rTmp2, rTmp3, rTmp4, ae, _masm); + byte_compare_helper(i + 1, L_loopTop, L_fixup, needle, needle_val, hs_ptr, eq_mask, set_bit, + rTmp4, ae, _masm); + } + } + } + //////////////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////////////// + // JUMP TABLES + __ align(8); + + *big_jump_table = __ pc(); + + for (jmp_ndx = 0; jmp_ndx < NUMBER_OF_CASES; jmp_ndx++) { + __ emit_address(big_hs_jmp_table[jmp_ndx]); + } + + *small_jump_table = __ pc(); + + for (jmp_ndx = 0; jmp_ndx < NUMBER_OF_CASES; jmp_ndx++) { + __ emit_address(small_hs_jmp_table[jmp_ndx]); + } +} + +#undef STACK_SPACE +#undef MAX_NEEDLE_LEN_TO_EXPAND +#undef CLEAR_BIT +#undef XMM_BYTE_0 +#undef XMM_BYTE_K +#undef XMM_BYTE_1 +#undef XMM_TMP1 +#undef XMM_TMP2 +#undef XMM_TMP3 +#undef XMM_TMP4 + +#undef __ diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp index 577c56cb7a2..c9c4b056eb5 100644 --- a/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp +++ b/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp @@ -4241,6 +4241,12 @@ void StubGenerator::generate_compiler_stubs() { generate_chacha_stubs(); +#ifdef COMPILER2 + if ((UseAVX == 2) && EnableX86ECoreOpts) { + generate_string_indexof(StubRoutines::_string_indexof_array); + } +#endif + if (UseAdler32Intrinsics) { StubRoutines::_updateBytesAdler32 = generate_updateBytesAdler32(); } diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64.hpp b/src/hotspot/cpu/x86/stubGenerator_x86_64.hpp index 02435bd172c..374679750a4 100644 --- a/src/hotspot/cpu/x86/stubGenerator_x86_64.hpp +++ b/src/hotspot/cpu/x86/stubGenerator_x86_64.hpp @@ -575,6 +575,9 @@ class StubGenerator: public StubCodeGenerator { void generate_libm_stubs(); +#ifdef COMPILER2 + void generate_string_indexof(address *fnptrs); +#endif address generate_cont_thaw(const char* label, Continuation::thaw_kind kind); address generate_cont_thaw(); diff --git a/src/hotspot/cpu/x86/stubRoutines_x86.hpp b/src/hotspot/cpu/x86/stubRoutines_x86.hpp index cfb91c5c083..b5ed3719897 100644 --- a/src/hotspot/cpu/x86/stubRoutines_x86.hpp +++ b/src/hotspot/cpu/x86/stubRoutines_x86.hpp @@ -37,7 +37,7 @@ enum platform_dependent_constants { _continuation_stubs_code_size = 1000 LP64_ONLY(+1000), // AVX512 intrinsics add more code in 64-bit VM, // Windows have more code to save/restore registers - _compiler_stubs_code_size = 20000 LP64_ONLY(+39000) WINDOWS_ONLY(+2000), + _compiler_stubs_code_size = 20000 LP64_ONLY(+46000) WINDOWS_ONLY(+2000), _final_stubs_code_size = 10000 LP64_ONLY(+20000) WINDOWS_ONLY(+2000) ZGC_ONLY(+20000) }; diff --git a/src/hotspot/share/opto/escape.cpp b/src/hotspot/share/opto/escape.cpp index 5630ee0228d..8a80392d5c7 100644 --- a/src/hotspot/share/opto/escape.cpp +++ b/src/hotspot/share/opto/escape.cpp @@ -2197,6 +2197,7 @@ void ConnectionGraph::process_call_arguments(CallNode *call) { strcmp(call->as_CallLeaf()->_name, "bigIntegerRightShiftWorker") == 0 || strcmp(call->as_CallLeaf()->_name, "bigIntegerLeftShiftWorker") == 0 || strcmp(call->as_CallLeaf()->_name, "vectorizedMismatch") == 0 || + strcmp(call->as_CallLeaf()->_name, "stringIndexOf") == 0 || strcmp(call->as_CallLeaf()->_name, "arraysort_stub") == 0 || strcmp(call->as_CallLeaf()->_name, "array_partition_stub") == 0 || strcmp(call->as_CallLeaf()->_name, "get_class_id_intrinsic") == 0 || diff --git a/src/hotspot/share/opto/library_call.cpp b/src/hotspot/share/opto/library_call.cpp index 33c2d7309d0..b3253a817a4 100644 --- a/src/hotspot/share/opto/library_call.cpp +++ b/src/hotspot/share/opto/library_call.cpp @@ -1205,6 +1205,9 @@ bool LibraryCallKit::inline_string_indexOf(StrIntrinsicNode::ArgEnc ae) { Node* tgt_start = array_element_address(tgt, intcon(0), T_BYTE); Node* tgt_count = load_array_length(tgt); + Node* result = nullptr; + bool call_opt_stub = (StubRoutines::_string_indexof_array[ae] != nullptr); + if (ae == StrIntrinsicNode::UU || ae == StrIntrinsicNode::UL) { // Divide src size by 2 if String is UTF16 encoded src_count = _gvn.transform(new RShiftINode(src_count, intcon(1))); @@ -1214,7 +1217,16 @@ bool LibraryCallKit::inline_string_indexOf(StrIntrinsicNode::ArgEnc ae) { tgt_count = _gvn.transform(new RShiftINode(tgt_count, intcon(1))); } - Node* result = make_indexOf_node(src_start, src_count, tgt_start, tgt_count, result_rgn, result_phi, ae); + if (call_opt_stub) { + Node* call = make_runtime_call(RC_LEAF, OptoRuntime::string_IndexOf_Type(), + StubRoutines::_string_indexof_array[ae], + "stringIndexOf", TypePtr::BOTTOM, src_start, + src_count, tgt_start, tgt_count); + result = _gvn.transform(new ProjNode(call, TypeFunc::Parms)); + } else { + result = make_indexOf_node(src_start, src_count, tgt_start, tgt_count, + result_rgn, result_phi, ae); + } if (result != nullptr) { result_phi->init_req(3, result); result_rgn->init_req(3, control()); @@ -1226,7 +1238,7 @@ bool LibraryCallKit::inline_string_indexOf(StrIntrinsicNode::ArgEnc ae) { return true; } -//-----------------------------inline_string_indexOf----------------------- +//-----------------------------inline_string_indexOfI----------------------- bool LibraryCallKit::inline_string_indexOfI(StrIntrinsicNode::ArgEnc ae) { if (too_many_traps(Deoptimization::Reason_intrinsic)) { return false; @@ -1234,6 +1246,7 @@ bool LibraryCallKit::inline_string_indexOfI(StrIntrinsicNode::ArgEnc ae) { if (!Matcher::match_rule_supported(Op_StrIndexOf)) { return false; } + assert(callee()->signature()->size() == 5, "String.indexOf() has 5 arguments"); Node* src = argument(0); // byte[] Node* src_count = argument(1); // char count @@ -1259,8 +1272,21 @@ bool LibraryCallKit::inline_string_indexOfI(StrIntrinsicNode::ArgEnc ae) { RegionNode* region = new RegionNode(5); Node* phi = new PhiNode(region, TypeInt::INT); + Node* result = nullptr; - Node* result = make_indexOf_node(src_start, src_count, tgt_start, tgt_count, region, phi, ae); + bool call_opt_stub = (StubRoutines::_string_indexof_array[ae] != nullptr); + + if (call_opt_stub) { + assert(arrayOopDesc::base_offset_in_bytes(T_BYTE) >= 16, "Needed for indexOf"); + Node* call = make_runtime_call(RC_LEAF, OptoRuntime::string_IndexOf_Type(), + StubRoutines::_string_indexof_array[ae], + "stringIndexOf", TypePtr::BOTTOM, src_start, + src_count, tgt_start, tgt_count); + result = _gvn.transform(new ProjNode(call, TypeFunc::Parms)); + } else { + result = make_indexOf_node(src_start, src_count, tgt_start, tgt_count, + region, phi, ae); + } if (result != nullptr) { // The result is index relative to from_index if substring was found, -1 otherwise. // Generate code which will fold into cmove. diff --git a/src/hotspot/share/opto/runtime.cpp b/src/hotspot/share/opto/runtime.cpp index d8e5cdbab04..a6f9a7e5470 100644 --- a/src/hotspot/share/opto/runtime.cpp +++ b/src/hotspot/share/opto/runtime.cpp @@ -1357,6 +1357,27 @@ const TypeFunc* OptoRuntime::base64_encodeBlock_Type() { const TypeTuple* range = TypeTuple::make(TypeFunc::Parms, fields); return TypeFunc::make(domain, range); } + +// String IndexOf function +const TypeFunc* OptoRuntime::string_IndexOf_Type() { + int argcnt = 4; + + const Type** fields = TypeTuple::fields(argcnt); + int argp = TypeFunc::Parms; + fields[argp++] = TypePtr::NOTNULL; // haystack array + fields[argp++] = TypeInt::INT; // haystack length + fields[argp++] = TypePtr::NOTNULL; // needle array + fields[argp++] = TypeInt::INT; // needle length + assert(argp == TypeFunc::Parms + argcnt, "correct decoding"); + const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields); + + // result type needed + fields = TypeTuple::fields(1); + fields[TypeFunc::Parms + 0] = TypeInt::INT; // Index of needle in haystack + const TypeTuple* range = TypeTuple::make(TypeFunc::Parms + 1, fields); + return TypeFunc::make(domain, range); +} + // Base64 decode function const TypeFunc* OptoRuntime::base64_decodeBlock_Type() { int argcnt = 7; diff --git a/src/hotspot/share/opto/runtime.hpp b/src/hotspot/share/opto/runtime.hpp index e4cbdf2f0d0..9ca8ac04943 100644 --- a/src/hotspot/share/opto/runtime.hpp +++ b/src/hotspot/share/opto/runtime.hpp @@ -297,6 +297,7 @@ private: static const TypeFunc* chacha20Block_Type(); static const TypeFunc* base64_encodeBlock_Type(); static const TypeFunc* base64_decodeBlock_Type(); + static const TypeFunc* string_IndexOf_Type(); static const TypeFunc* poly1305_processBlocks_Type(); static const TypeFunc* intpoly_montgomeryMult_P256_Type(); static const TypeFunc* intpoly_assign_Type(); diff --git a/src/hotspot/share/runtime/stubRoutines.cpp b/src/hotspot/share/runtime/stubRoutines.cpp index 74286a4ac98..773f8031e15 100644 --- a/src/hotspot/share/runtime/stubRoutines.cpp +++ b/src/hotspot/share/runtime/stubRoutines.cpp @@ -149,6 +149,8 @@ address StubRoutines::_sha3_implCompressMB = nullptr; address StubRoutines::_updateBytesCRC32 = nullptr; address StubRoutines::_crc_table_adr = nullptr; +address StubRoutines::_string_indexof_array[4] = { nullptr }; + address StubRoutines::_crc32c_table_addr = nullptr; address StubRoutines::_updateBytesCRC32C = nullptr; address StubRoutines::_updateBytesAdler32 = nullptr; diff --git a/src/hotspot/share/runtime/stubRoutines.hpp b/src/hotspot/share/runtime/stubRoutines.hpp index 65b0c0d2f26..762a6edf590 100644 --- a/src/hotspot/share/runtime/stubRoutines.hpp +++ b/src/hotspot/share/runtime/stubRoutines.hpp @@ -232,6 +232,8 @@ class StubRoutines: AllStatic { static address _updateBytesCRC32; static address _crc_table_adr; + static address _string_indexof_array[4]; + static address _crc32c_table_addr; static address _updateBytesCRC32C; static address _updateBytesAdler32; diff --git a/test/jdk/TEST.ROOT b/test/jdk/TEST.ROOT index 25ef8250e4d..bfa99c72b66 100644 --- a/test/jdk/TEST.ROOT +++ b/test/jdk/TEST.ROOT @@ -11,7 +11,7 @@ # # A test flagged with cgroups uses cgroups. # -# Notes on "client" keywords : headful sound printer multimon +# Notes on "client" keywords : headful sound printer multimon # =========================================================== # # These keywords are there to help with test selection so that @@ -31,7 +31,7 @@ # Tests may not fail if there is none, instead just silently return. # But they also may legitimately throw an Exception depending on the test. # Also printer tests are not necessarily headful, but some are, and some are automated. -# +# # "sound". Similarly, not all sound tests require audio devices, but many do. # A test flagged with key "sound" needs audio devices on the system. # Also they are not necessarily "headful", since they don't require a display etc. @@ -99,6 +99,7 @@ requires.properties= \ vm.jvmci \ vm.jvmci.enabled \ vm.jvmti \ + vm.cpu.features \ docker.support \ release.implementor \ jdk.containerized \ diff --git a/test/jdk/java/lang/String/IndexOf.java b/test/jdk/java/lang/String/IndexOf.java new file mode 100644 index 00000000000..baab83e19c4 --- /dev/null +++ b/test/jdk/java/lang/String/IndexOf.java @@ -0,0 +1,258 @@ +/* + * Copyright (c) 2024, Intel Corporation. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + * @test + * @bug 8320448 + * @summary test String indexOf() intrinsic + * @run driver IndexOf + */ + +/* + * @test + * @bug 8320448 + * @summary test String indexOf() intrinsic + * @requires vm.cpu.features ~= ".*avx2.*" + * @requires vm.compiler2.enabled + * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -Xcomp -XX:-TieredCompilation -XX:UseAVX=2 -XX:+UnlockDiagnosticVMOptions -XX:+EnableX86ECoreOpts IndexOf + */ + + public class IndexOf { + final int scope = 32*2+16+8; + final char a, aa, b, c, d; + enum Encoding {LL, UU, UL; } + final Encoding ae; + int failures; + + IndexOf(Encoding _ae) { + failures = 0; + ae = _ae; + switch (ae) { + case LL: + a = 'a'; + aa = a; + b = 'b'; + c = 'c'; + d = 'd'; + break; + case UU: + a = '\u0061'; + aa = a; + b = '\u0062'; + c = '\u1063'; + d = '\u0064'; + break; + default: //case UL: + a = 'a'; + aa = '\u1061'; + b = 'b'; + c = 'c'; + d = 'd'; + break; + } + } + + // needle =~ /ab*d/ + // badNeedle =~ /ab*db*d/ + interface Append {void append(int pos, char cc);} + String newNeedle(int size, int badPosition) { + if (size<2) {throw new RuntimeException("Fix testcase "+size);} + + StringBuilder needle = new StringBuilder(size); + Append n = (int pos, char cc) -> { + if (pos == badPosition) + needle.append(c); + else + needle.append(cc); + }; + + n.append(0, a); + for (int i=1; isize) {throw new RuntimeException("Fix testcase "+nPosition+" "+needle.length()+" "+size);} + StringBuilder haystack = new StringBuilder(size); + int i = 0; + for (; isize) {throw new RuntimeException("Fix testcase "+nPosition+" "+needle.length()+" "+size);} + StringBuilder haystack = new StringBuilder(size); + int i = 0; + for (; ihaystack_len) return -1; + if (-1 != "Hello".indexOf("HelloWorld", 3)) { + System.out.println("FAILED: if (needle_len>haystack_len) return -1"); + failures++; + } + return this; + } + + IndexOf test1() { // Test expected to find one needle + for (int nSize = 2; nSize titles = new HashMap(); + static Random rng = new Random(1999); + + public static void main(String[] args) throws Exception { + int foo = 0; + String testName = "ECoreIndexOf"; + + generator = new Random(); + long seed = generator.nextLong(); + generator.setSeed(seed); + System.out.println("Seed set to "+ seed); + + /////////////////////////// WARM-UP ////////////////////////// + + for (int i = 0; i < 20000; i++) { + char c = 65; + char c16 = 0x1ed; + StringBuffer sb = new StringBuffer("a"); + StringBuffer sb16 = new StringBuffer("\u01fe"); + + foo += indexOfKernel("\u01fe", "a"); + foo += indexOfKernel("\u01fe", "a", 0); + foo += indexOfKernel("\u01fe", "\u01ff"); + foo += indexOfKernel("\u01fe", "\u01ff", 0); + foo += indexOfKernel("a", "a"); + foo += indexOfKernel("a", "a", 0); + foo += indexOfKernel("a", "\u01ff"); + foo += indexOfKernel("a", "\u01ff", 0); + + foo += indexOfKernel("\u01fe", c); + foo += indexOfKernel("\u01fe", c, 0); + foo += indexOfKernel("\u01fe", c16); + foo += indexOfKernel("\u01fe", c16, 0); + foo += indexOfKernel("a", c); + foo += indexOfKernel("a", c, 0); + foo += indexOfKernel("a", c16); + foo += indexOfKernel("a", c16, 0); + + foo += indexOfKernel(sb16, c); + foo += indexOfKernel(sb16, c, 0); + foo += indexOfKernel(sb16, c16); + foo += indexOfKernel(sb16, c16, 0); + foo += indexOfKernel(sb, c); + foo += indexOfKernel(sb, c, 0); + foo += indexOfKernel(sb, c16); + foo += indexOfKernel(sb, c16, 0); + } + + /////////////////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////////////////// + + String[] decorators = {"", " (same char)"}; + Charset[] charSets = {StandardCharsets.ISO_8859_1, StandardCharsets.UTF_16}; + boolean[] truefalse = {true, false}; + + titles.put(StandardCharsets.ISO_8859_1, "L"); + titles.put(StandardCharsets.UTF_16, "U"); + + for (int xxy = 0; xxy < 2; xxy++) { // Run at least twice to ensure stub called + + for (int i = 0; i < 128; i++) { + haystack[i] = (char) i; + } + + haystack_16[0] = '\u0000'; // (char) (23 + 256); + for (int i = 1; i < 128; i++) { + haystack_16[i] = (char) (i); + } + + simpleTest(); + compareIndexOfLastIndexOf(); + compareStringStringBuffer(); + StringIndexof(); + StringIndexofChar(); + StringIndexofHuge(); + + for (String decorator : decorators) { + for (Charset csHaystack : charSets) { + for (Charset csNeedle : charSets) { + System.out.println("Testing " + titles.get(csHaystack) + titles.get(csNeedle) + decorator); + for (boolean useOffset : truefalse) { + for (boolean useBuffer : truefalse) { + exhaustive(useOffset, useBuffer, csHaystack, csNeedle); + } + } + } + } + + for (int i = 0; i < 128; i++) { + haystack[i] = (char) 'a'; + } + + for (int i = 0; i < 128; i++) { + haystack_16[i] = (char) ('a' + 256); + } + } + } + + System.out.println(testName + " complete."); + + if (failure) + throw new RuntimeException("One or more failures."); + } + + private static void report(String testName, int failCount) { + System.err.println(testName + ": " + + (failCount == 0 ? "Passed" : "Failed(" + failCount + ")")); + if (failCount > 0) + failure = true; + } + + private static String generateTestString(int min, int max) { + StringBuffer aNewString = new StringBuffer(120); + int aNewLength = getRandomIndex(min, max); + for (int y = 0; y < aNewLength; y++) { + int achar = generator.nextInt(30) + 30; + char test = (char) (achar); + aNewString.append(test); + } + return aNewString.toString(); + } + + private static String makeRndString(boolean isUtf16, int length) { + StringBuilder sb = new StringBuilder(length); + if (length > 0) { + sb.append(isUtf16 ? '\u2026' : 'b'); // ... + + for (int i = 1; i < length - 1; i++) { + sb.append((char) ('b' + rng.nextInt(26))); + } + + sb.append(rng.nextInt(3) >= 1 ? 'a' : 'b');// 66.6% of time 'a' is in string + } + return sb.toString(); + } + + private static int indexOfKernel(String haystack, String needle) { + return haystack.indexOf(needle); + } + + private static int indexOfKernel(String haystack, String needle, int offset) { + return haystack.indexOf(needle, offset); + } + + private static int indexOfKernel(StringBuffer haystack, String needle) { + return haystack.indexOf(needle); + } + + private static int indexOfKernel(StringBuffer haystack, char cneedle) { + String needle = String.valueOf(cneedle); + return haystack.indexOf(needle); + } + + private static int indexOfKernel(StringBuffer haystack, String needle, int offset) { + return haystack.indexOf(needle, offset); + } + + private static int indexOfKernel(StringBuffer haystack, char cneedle, int offset) { + String needle = String.valueOf(cneedle); + return haystack.indexOf(needle, offset); + } + + private static int indexOfKernel(String haystack, char needle) { + return haystack.indexOf(needle); + } + + private static int indexOfKernel(String haystack, char needle, int offset) { + return haystack.indexOf(needle, offset); + } + + private static void printStringBytes(byte[] bytes) { + System.err.println(" bytes.len=" + bytes.length); + for (byte b : bytes) { + System.err.print(String.format("0x%02x ", b)); + } + System.err.println(""); + } + + private static int getRandomIndex(int constraint1, int constraint2) { + int range = constraint2 - constraint1; + int x = generator.nextInt(range); + return constraint1 + x; + } + + private static int naiveFind(String haystack, String needle) { + return naiveFind(haystack, needle, 0); + } + + private static int naiveFind(String haystack, char needle) { + return naiveFind(haystack, needle, 0); + } + + private static int naiveFind(String haystack, String needle, int offset) { + int x = offset; + int len = haystack.length() - offset; + if (needle.length() == 0) + return offset; + if (needle.length() > len) + return -1; + int hsndx = 0; + int nndx = 0; + for (int xx = 0; xx < offset; xx++) { + hsndx += Character.charCount(haystack.codePointAt(hsndx)); + } + + for (x = offset; x < haystack.length() - needle.length() + 1; x++) { + if (haystack.codePointAt(hsndx) == needle.codePointAt(0)) { + nndx = Character.charCount(needle.codePointAt(0)); + int hsndx_tmp = hsndx + Character.charCount(haystack.codePointAt(hsndx)); + + while (nndx < needle.length()) { + if (haystack.codePointAt(hsndx_tmp) != needle.codePointAt(nndx)) { + break; + } + hsndx_tmp += Character.charCount(haystack.codePointAt(hsndx_tmp)); + nndx += Character.charCount(needle.codePointAt(nndx)); + } + if (nndx == needle.length()) { + return x; + } + } + hsndx += Character.charCount(haystack.codePointAt(hsndx)); + } + return -1; + } + + private static int naiveFind(String haystack, char cneedle, int offset) { + int x = offset; + int len = haystack.length() - offset; + String needle = String.valueOf(cneedle); + if (len == 0) + return -1; + int hsndx = 0; + for (int xx = 0; xx < offset; xx++) { + hsndx += Character.charCount(haystack.codePointAt(hsndx)); + } + + for (x = offset; x < haystack.length(); x++) { + if (haystack.codePointAt(hsndx) == needle.codePointAt(0)) { + return x; + } + hsndx += Character.charCount(haystack.codePointAt(hsndx)); + } + + return -1; + } + + private static void exhaustive(boolean useOffset, boolean useStringBuffer, Charset hs_charset, + Charset needleCharset) { + int result = 0; + int midresult = 0; + int endresult = 0; + int l_offset = 0; + int failCount = 0; + + String thisTest = titles.get(hs_charset) + titles.get(needleCharset) + (useOffset ? " w/offset" : "") + (useStringBuffer ? " StringBuffer" : ""); + + for (int needleSize = 0; needleSize < 128; needleSize++) { + for (int haystackSize = 0; haystackSize < 128; haystackSize++) { + for (l_offset = 0; l_offset <= haystackSize; l_offset++) { + String needle = new String(Arrays.copyOfRange( + (needleCharset == StandardCharsets.UTF_16) ? haystack_16 : haystack, l_offset, l_offset + needleSize)); + int hsSize = (haystackSize - l_offset) >= 0 ? haystackSize - l_offset : 0; + int midStart = Math.max((hsSize / 2) - (needleSize / 2), 0); + int endStart = (hsSize > needleSize) ? hsSize - needleSize : 0; + String midNeedle = new String( + Arrays.copyOfRange((needleCharset == StandardCharsets.UTF_16) ? haystack_16 : haystack, + midStart + l_offset, midStart + needleSize + l_offset)); + String endNeedle = new String( + Arrays.copyOfRange((needleCharset == StandardCharsets.UTF_16) ? haystack_16 : haystack, + endStart + l_offset, endStart + needleSize + l_offset)); + String shs = new String( + Arrays.copyOfRange((hs_charset == StandardCharsets.UTF_16) ? haystack_16 : haystack, 0, haystackSize)); + + // Truncate needles to correct lengths + + if (l_offset + needleSize > haystack.length + 1) { + needle = needle.substring(0, needleSize); + midNeedle = midNeedle.substring(0, needleSize); + endNeedle = endNeedle.substring(0, needleSize); + } + + if (!success && needleSize > 1) { + needle = needle.substring(0, needle.length() - 1) + (char) ((int) (needle.charAt(needle.length() - 2) + 1)); + midNeedle = midNeedle.substring(0, midNeedle.length() - 1) + + (char) ((int) (midNeedle.charAt(midNeedle.length() - 2) + 1)); + endNeedle = endNeedle.substring(0, endNeedle.length() - 1) + + (char) ((int) (endNeedle.charAt(endNeedle.length() - 2) + 1)); + } + + StringBuffer hs = new StringBuffer(shs.length()); + hs.append(shs); + if (!shs.equals(hs.toString())) + throw new RuntimeException("Initial equality failure"); + + if (useStringBuffer) { + result = indexOfKernel(hs, needle, l_offset); + midresult = indexOfKernel(hs, midNeedle, l_offset); + endresult = indexOfKernel(hs, endNeedle, l_offset); + } else { + result = indexOfKernel(shs, needle, l_offset); + midresult = indexOfKernel(shs, midNeedle, l_offset); + endresult = indexOfKernel(shs, endNeedle, l_offset); + } + int nResult = naiveFind(hs.toString(), needle, l_offset); + int midnResult = naiveFind(hs.toString(), midNeedle, l_offset); + int endnResult = naiveFind(hs.toString(), endNeedle, l_offset); + if (result != nResult) { + failCount++; + System.err.println("useOffset=" + useOffset + ", useStringBuffer=" + useStringBuffer); + System.err.print("Haystack="); + printStringBytes(shs.getBytes(hs_charset)); + System.err.print("Needle="); + printStringBytes(needle.getBytes(needleCharset)); + System.err.println("l_offset=" + l_offset); + System.err.println("haystackLen=" + haystackSize + " needleLen=" + needleSize + + " result=" + result + " nResult=" + nResult); + System.err.println(""); + } + // badResults = success ? ((midnResult == -1) || (midresult == -1)) : + // ((midnResult != -1) || (midresult != -1)); + if ((midresult != midnResult)) { + failCount++; + System.err.println("useOffset=" + useOffset + ", useStringBuffer=" + useStringBuffer); + System.err.print("Haystack="); + printStringBytes(shs.getBytes(hs_charset)); + System.err.print("Needle="); + printStringBytes(midNeedle.getBytes(needleCharset)); + System.err.println("l_offset=" + l_offset); + System.err.println("haystackLen=" + haystackSize + " needleLen=" + needleSize + + " midresult=" + midresult + " midnResult=" + midnResult); + System.err.println(""); + } + // badResults = success ? ((endnResult == -1) || (endresult == -1)) : + // ((endnResult != -1) || (endresult != -1)); + if ((endresult != endnResult)) { + failCount++; + System.err.println("useOffset=" + useOffset + ", useStringBuffer=" + useStringBuffer); + System.err.print("Haystack="); + printStringBytes(shs.getBytes(hs_charset)); + System.err.print("Needle="); + printStringBytes(endNeedle.getBytes(needleCharset)); + System.err.println("l_offset=" + l_offset); + System.err.println("haystackLen=" + haystackSize + " needleLen=" + needleSize + + " endresult=" + endresult + " endnResult=" + endnResult); + System.err.println(""); + } + + if (!useOffset) + l_offset = haystackSize + 100; + } + } + } + + report("Exhaustive " + thisTest, failCount); + } + + private static void PrintError(int kernel, int naive, int num, String prefix, String hs, char needle) { + PrintError(kernel, naive, num, prefix, hs, String.valueOf(needle)); + } + + private static void PrintError(int kernel, int naive, int num, String prefix, String hs, String needle) { + if (!verbose) + return; + System.err.println(prefix + ": (" + num + "): kernel=" + kernel + ", naive=" + naive); + System.err.print("Haystack="); + printStringBytes(hs.getBytes()); + System.err.print("Needle="); + printStringBytes(needle.getBytes()); + System.err.println(""); + } + + private static void simpleTest() { + int failCount = 0; + String sourceString; + StringBuffer sourceBuffer; + String targetString; + String emptyString = ""; + String allAs = new String("aaaaaaaaaaaaaaaaaaaaaaaaa"); + StringBuffer allAsBuffer = new StringBuffer(allAs); + + for (int i = 0; i < 10000; i++) { + do { + sourceString = generateTestString(99, 100); + sourceBuffer = new StringBuffer(sourceString); + targetString = generateTestString(10, 11); + } while (indexOfKernel(sourceString, targetString) != -1); + + int index1 = generator.nextInt(90) + 5; + sourceBuffer = sourceBuffer.replace(index1, index1, targetString); + + if ((indexOfKernel(sourceBuffer, targetString) != index1) || + (index1 != naiveFind(sourceBuffer.toString(), targetString, 0))) { + System.err.println("sourceBuffer.indexOf(targetString) fragment '" + targetString + "' (" + + targetString.length() + ") String = " + + sourceBuffer.toString() + " len Buffer = " + sourceBuffer.toString().length()); + System.err.println(" naive = " + naiveFind(sourceBuffer.toString(), targetString, 0) + ", IndexOf = " + + indexOfKernel(sourceBuffer, targetString)); + failCount++; + } + if ((indexOfKernel(sourceBuffer, targetString, 5) != index1) || + (index1 != naiveFind(sourceBuffer.toString(), targetString, 0))) { + System.err.println("sourceBuffer.indexOf(targetString, 5) fragment '" + targetString + "' (" + + targetString.length() + ") String = " + + sourceBuffer.toString() + " len Buffer = " + sourceBuffer.toString().length()); + System.err.println(" naive = " + naiveFind(sourceBuffer.toString(), targetString, 0) + ", IndexOf = " + + indexOfKernel(sourceBuffer, targetString, 5)); + failCount++; + } + if ((indexOfKernel(sourceBuffer, targetString, 99) == index1) || + (index1 != naiveFind(sourceBuffer.toString(), targetString, 0))) { + System.err.println("sourceBuffer.indexOf(targetString, 99) fragment '" + targetString + "' (" + + targetString.length() + ") String = " + + sourceBuffer.toString() + " len Buffer = " + sourceBuffer.toString().length()); + System.err.println(" naive = " + naiveFind(sourceBuffer.toString(), targetString, 0) + ", IndexOf = " + + indexOfKernel(sourceBuffer, targetString, 99)); + failCount++; + } + if ((indexOfKernel(sourceBuffer, emptyString, 99) != 99) || + (99 != naiveFind(sourceBuffer.toString(), emptyString, 99))) { + System.err.println("sourceBuffer.indexOf(emptyString, 99) fragment '" + emptyString + "' (" + + emptyString.length() + ") String = " + + sourceBuffer.toString() + " len Buffer = " + sourceBuffer.toString().length()); + System.err.println(" naive = " + naiveFind(sourceBuffer.toString(), emptyString, 99) + ", IndexOf = " + + indexOfKernel(sourceBuffer, emptyString, 99)); + failCount++; + } + if ((indexOfKernel(allAsBuffer.substring(1, 3), allAsBuffer.substring(5, 12)) != -1) || + (-1 != naiveFind(allAsBuffer.substring(1, 3).toString(), allAsBuffer.substring(5, 12), 0))) { + System.err.println("allAsBuffer.substring(1, 3).indexOf(allAsBuffer.substring(5, 12)) fragment '" + + allAsBuffer.substring(5, 12) + "' (" + + allAsBuffer.substring(5, 12).length() + ") String = " + + allAsBuffer.substring(1, 3) + " len Buffer = " + allAsBuffer.substring(1, 3).length()); + System.err.println( + " naive = " + naiveFind(allAsBuffer.substring(1, 3).toString(), allAsBuffer.substring(5, 12), 0) + + ", IndexOf = " + indexOfKernel(allAsBuffer.substring(1, 3), allAsBuffer.substring(5, 12))); + failCount++; + } + } + + report("Basic Test ", failCount); + } + + // Note: it is possible although highly improbable that failCount will + // be > 0 even if everthing is working ok + private static void compareIndexOfLastIndexOf() { + int failCount = 0; + String sourceString; + StringBuffer sourceBuffer; + String targetString; + + for (int i = 0; i < 10000; i++) { + do { + sourceString = generateTestString(99, 100); + sourceBuffer = new StringBuffer(sourceString); + targetString = generateTestString(10, 11); + } while (indexOfKernel(sourceString, targetString) != -1); + + int index1 = generator.nextInt(100); + sourceBuffer = sourceBuffer.replace(index1, index1, targetString); + + // extremely remote possibility of > 1 match + int matches = 0; + int index2 = -1; + while ((index2 = indexOfKernel(sourceBuffer, targetString, index2 + 1)) != -1) + matches++; + if (matches > 1) + continue; + + if (indexOfKernel(sourceBuffer, targetString) != sourceBuffer.lastIndexOf(targetString)) + failCount++; + sourceString = sourceBuffer.toString(); + if (indexOfKernel(sourceString, targetString) != sourceString.lastIndexOf(targetString)) + failCount++; + } + + report("IndexOf vs LastIndexOf ", failCount); + } + + private static void compareStringStringBuffer() { + int failCount = 0; + boolean make_new = true; + + String fragment = null; + StringBuffer testBuffer = null; + String testString = null; + int testIndex = 0; + + failCount = indexOfKernel("", ""); + + for (int x = 0; x < 1000000; x++) { + if (make_new) { + testString = generateTestString(1, 100); + int len = testString.length(); + + testBuffer = new StringBuffer(len); + testBuffer.append(testString); + if (!testString.equals(testBuffer.toString())) + throw new RuntimeException("Initial equality failure"); + + int x1 = 0; + int x2 = 1000; + while (x2 > testString.length()) { + x1 = generator.nextInt(len); + x2 = generator.nextInt(100); + x2 = x1 + x2; + } + fragment = testString.substring(x1, x2); + } + + int sAnswer = indexOfKernel(testString, fragment); + int sbAnswer = indexOfKernel(testBuffer, fragment); + + if (sAnswer != sbAnswer) { + System.err.println("(1) IndexOf fragment '" + fragment + "' (" + fragment.length() + ") len String = " + + testString.length() + " len Buffer = " + testBuffer.length()); + System.err.println(" sAnswer = " + sAnswer + ", sbAnswer = " + sbAnswer); + System.err.println(" testString = '" + testString + "'"); + System.err.println(" testBuffer = '" + testBuffer + "'"); + failCount++; + + sAnswer = indexOfKernel(testString, fragment); + sbAnswer = indexOfKernel(testBuffer, fragment); + } else { + if (sAnswer > testString.length()) { + System.err.println( + "IndexOf returned value out of range; return: " + sAnswer + " length max: " + testBuffer.length()); + } + } + + if ((fragment == "0#:02/62;+-\"\"0$25-5$#)1263") && (testBuffer.length() == 94)) { + String xx = "abc"; + String yy = "abcdefg"; + int sA = indexOfKernel(xx, yy); + } + + if (make_new) + testIndex = getRandomIndex(-100, 100); + + sAnswer = indexOfKernel(testString, fragment, testIndex); + sbAnswer = indexOfKernel(testBuffer, fragment, testIndex); + + if (sAnswer != sbAnswer) { + System.err.println("(2) IndexOf fragment '" + fragment + "' (" + fragment.length() + ") index = " + testIndex + + " len String = " + testString.length() + " len Buffer = " + testBuffer.length()); + System.err.println(" sAnswer = " + sAnswer + ", sbAnswer = " + sbAnswer); + System.err.println(" testString = '" + testString + "'"); + System.err.println(" testBuffer = '" + testBuffer + "'"); + failCount++; + make_new = true; + + sAnswer = indexOfKernel(testString, fragment, testIndex); + sbAnswer = indexOfKernel(testBuffer, fragment, testIndex); + } else { + if ((sAnswer > testString.length()) || ((sAnswer != -1) && (sAnswer < testIndex) && (fragment.length() != 0))) { + System.err.println("IndexOf returned value out of range; return: " + sAnswer + " length max: " + + testString.length() + " index: " + testIndex); + System.err.println("(3) IndexOf fragment '" + fragment + "' (" + fragment.length() + ") index = " + testIndex + + " len String = " + testString.length() + " len Buffer = " + testBuffer.length()); + } + } + + sAnswer = testString.lastIndexOf(fragment); + sbAnswer = testBuffer.lastIndexOf(fragment); + + if (sAnswer != sbAnswer) { + System.err.println("(1) lastIndexOf fragment '" + fragment + "' len String = " + testString.length() + + " len Buffer = " + testBuffer.length()); + System.err.println(" sAnswer = " + sAnswer + ", sbAnswer = " + sbAnswer); + failCount++; + + sAnswer = testString.lastIndexOf(fragment); + sbAnswer = testBuffer.lastIndexOf(fragment); + } + + if (make_new) + testIndex = getRandomIndex(-100, 100); + + sAnswer = testString.lastIndexOf(fragment, testIndex); + sbAnswer = testBuffer.lastIndexOf(fragment, testIndex); + + if (sAnswer != sbAnswer) { + System.err.println("(2) lastIndexOf fragment '" + fragment + "' index = " + testIndex + " len String = " + + testString.length() + " len Buffer = " + testBuffer.length()); + failCount++; + } + } + + report("String vs StringBuffer ", failCount); + } + + ////////////////////////////////////////////////////////////////////// + // Test routines used in benchmarks + // + // From StringIndexofHuge + private static void StringIndexofHuge() { + int stubResult = 0; + int failCount = 0; + + for (int xx = 0; xx < 2; xx++) { + int num = 1; + + String dataString = "ngdflsoscargfdgf"; + String dataString16 = "ngdfilso\u01facargfd\u01eef"; + String dataStringHuge = (("A".repeat(32) + "B".repeat(32)).repeat(16) + "X").repeat(2) + "bB"; + String dataStringHuge16 = "\u01de" + (("A".repeat(32) + "B".repeat(32)).repeat(16) + "\u01fe").repeat(2) + + "\u01eeB"; + String earlyMatchString = dataStringHuge.substring(0, 34); + String earlyMatchString16 = dataStringHuge16.substring(0, 34); + String midMatchString = dataStringHuge.substring(dataStringHuge.length() / 2 - 16, + dataStringHuge.length() / 2 + 32); + String midMatchString16 = dataStringHuge16.substring(dataStringHuge16.length() / 2 - 16, + dataStringHuge16.length() / 2 + 32); + String lateMatchString = dataStringHuge.substring(dataStringHuge.length() - 31); + String lateMatchString16 = dataStringHuge16.substring(dataStringHuge16.length() - 31); + + String searchString = "oscar"; + String searchString16 = "o\u01facar"; + String searchStringSmall = "dgf"; + String searchStringSmall16 = "d\u01eef"; + + String searchStringHuge = "capaapapapasdkajdlkajskldjaslkajdlkajskldjaslkjdlkasjdsalk"; + String searchStringHuge16 = "capaapapapasdkajdlka\u01feskldjaslkajdlkajskldjaslkjdlkasjdsalk"; + + String searchNoMatch = "XYXyxYxy".repeat(22); + String searchNoMatch16 = "\u01ab\u01ba\u01cb\u01bc\u01de\u01ed\u01fa\u01af".repeat(22); + + stubResult = indexOfKernel(dataStringHuge16, earlyMatchString); + int nResult = naiveFind(dataStringHuge16, earlyMatchString); + if (nResult != stubResult) { + PrintError(stubResult, nResult, num, "StringIndexofHuge", dataStringHuge16, earlyMatchString); + failCount++; + } + num++; + stubResult = indexOfKernel(dataStringHuge, earlyMatchString); + nResult = naiveFind(dataStringHuge, earlyMatchString); + if (nResult != stubResult) { + PrintError(stubResult, nResult, num, "StringIndexofHuge", dataStringHuge, earlyMatchString); + failCount++; + } + num++; + stubResult = indexOfKernel(dataStringHuge, midMatchString); + nResult = naiveFind(dataStringHuge, midMatchString); + if (nResult != stubResult) { + PrintError(stubResult, nResult, num, "StringIndexofHuge", dataStringHuge, midMatchString); + failCount++; + } + num++; + stubResult = indexOfKernel(dataStringHuge, lateMatchString); + nResult = naiveFind(dataStringHuge, lateMatchString); + if (nResult != stubResult) { + PrintError(stubResult, nResult, num, "StringIndexofHuge", dataStringHuge, lateMatchString); + failCount++; + } + num++; + stubResult = indexOfKernel(dataStringHuge, searchNoMatch); + nResult = naiveFind(dataStringHuge, searchNoMatch); + if (nResult != stubResult) { + PrintError(stubResult, nResult, num, "StringIndexofHuge", dataStringHuge, searchNoMatch); + failCount++; + } + num++; + stubResult = indexOfKernel(searchString, searchString); + nResult = naiveFind(searchString, searchString); + if (nResult != stubResult) { + PrintError(stubResult, nResult, num, "StringIndexofHuge", searchString, searchString); + failCount++; + } + num++; + stubResult = indexOfKernel(dataString, searchString); + nResult = naiveFind(dataString, searchString); + if (nResult != stubResult) { + PrintError(stubResult, nResult, num, "StringIndexofHuge", dataString, searchString); + failCount++; + } + num++; + stubResult = indexOfKernel(dataString, searchStringSmall); + nResult = naiveFind(dataString, searchStringSmall); + if (nResult != stubResult) { + PrintError(stubResult, nResult, num, "StringIndexofHuge", dataString, searchStringSmall); + failCount++; + } + num++; + stubResult = indexOfKernel(dataStringHuge, "B".repeat(30) + "X" + "A".repeat(30), 74); + nResult = naiveFind(dataStringHuge, "B".repeat(30) + "X" + "A".repeat(30), 74); + if (nResult != stubResult) { + PrintError(stubResult, nResult, num, "StringIndexofHuge", dataStringHuge, + "B".repeat(30) + "X" + "A".repeat(30)); + failCount++; + } + num++; + stubResult = indexOfKernel(dataStringHuge, "A".repeat(32) + "F" + "B".repeat(32), 64); + nResult = naiveFind(dataStringHuge, "A".repeat(32) + "F" + "B".repeat(32), 64); + if (nResult != stubResult) { + PrintError(stubResult, nResult, num, "StringIndexofHuge", dataStringHuge, + "A".repeat(32) + "F" + "B".repeat(32)); + failCount++; + } + num++; + stubResult = indexOfKernel(midMatchString, dataStringHuge, 3); + nResult = naiveFind(midMatchString, dataStringHuge, 3); + if (nResult != stubResult) { + PrintError(stubResult, nResult, num, "StringIndexofHuge", midMatchString, dataStringHuge); + failCount++; + } + num++; + stubResult = indexOfKernel(dataStringHuge, "A".repeat(32) + "B".repeat(30) + "bB"); + nResult = naiveFind(dataStringHuge, "A".repeat(32) + "B".repeat(30) + "bB"); + if (nResult != stubResult) { + PrintError(stubResult, nResult, num, "StringIndexofHuge", dataStringHuge, + "A".repeat(32) + "B".repeat(30) + "bB"); + failCount++; + } + num++; + stubResult = indexOfKernel(dataStringHuge16, earlyMatchString); + nResult = naiveFind(dataStringHuge16, earlyMatchString); + if (nResult != stubResult) { + PrintError(stubResult, nResult, num, "StringIndexofHuge", dataStringHuge16, earlyMatchString); + failCount++; + } + num++; + stubResult = indexOfKernel(dataStringHuge16, midMatchString); + nResult = naiveFind(dataStringHuge16, midMatchString); + if (nResult != stubResult) { + PrintError(stubResult, nResult, num, "StringIndexofHuge", dataStringHuge16, midMatchString); + failCount++; + } + num++; + stubResult = indexOfKernel(dataStringHuge16, lateMatchString); + nResult = naiveFind(dataStringHuge16, lateMatchString); + if (nResult != stubResult) { + PrintError(stubResult, nResult, num, "StringIndexofHuge", dataStringHuge16, lateMatchString); + failCount++; + } + num++; + stubResult = indexOfKernel(dataStringHuge16, searchNoMatch); + nResult = naiveFind(dataStringHuge16, searchNoMatch); + if (nResult != stubResult) { + PrintError(stubResult, nResult, num, "StringIndexofHuge", dataStringHuge16, searchNoMatch); + failCount++; + } + num++; + stubResult = indexOfKernel(searchString16, searchString); + nResult = naiveFind(searchString16, searchString); + if (nResult != stubResult) { + PrintError(stubResult, nResult, num, "StringIndexofHuge", searchString16, searchString); + failCount++; + } + num++; + stubResult = indexOfKernel(dataString16, searchString); + nResult = naiveFind(dataString16, searchString); + if (nResult != stubResult) { + PrintError(stubResult, nResult, num, "StringIndexofHuge", dataString16, searchString); + failCount++; + } + num++; + stubResult = indexOfKernel(dataString16, searchStringSmall); + nResult = naiveFind(dataString16, searchStringSmall); + if (nResult != stubResult) { + PrintError(stubResult, nResult, num, "StringIndexofHuge", dataString16, searchStringSmall); + failCount++; + } + num++; + stubResult = indexOfKernel(dataStringHuge16, "B".repeat(30) + "X" + "A".repeat(30), 74); + nResult = naiveFind(dataStringHuge16, "B".repeat(30) + "X" + "A".repeat(30), 74); + if (nResult != stubResult) { + PrintError(stubResult, nResult, num, "StringIndexofHuge", dataStringHuge16, + "B".repeat(30) + "X" + "A".repeat(30)); + failCount++; + } + num++; + stubResult = indexOfKernel(dataStringHuge16, "A".repeat(32) + "F" + "B".repeat(32), 64); + nResult = naiveFind(dataStringHuge16, "A".repeat(32) + "F" + "B".repeat(32), 64); + if (nResult != stubResult) { + PrintError(stubResult, nResult, num, "StringIndexofHuge", dataStringHuge16, + "A".repeat(32) + "F" + "B".repeat(32)); + failCount++; + } + num++; + stubResult = indexOfKernel(midMatchString16, dataStringHuge, 3); + nResult = naiveFind(midMatchString16, dataStringHuge, 3); + if (nResult != stubResult) { + PrintError(stubResult, nResult, num, "StringIndexofHuge", midMatchString16, dataStringHuge); + failCount++; + } + num++; + stubResult = indexOfKernel(dataStringHuge16, "A".repeat(32) + "B".repeat(30) + "bB"); + nResult = naiveFind(dataStringHuge16, "A".repeat(32) + "B".repeat(30) + "bB"); + if (nResult != stubResult) { + PrintError(stubResult, nResult, num, "StringIndexofHuge", dataStringHuge16, + "A".repeat(32) + "B".repeat(30) + "bB"); + failCount++; + } + num++; + stubResult = indexOfKernel(dataStringHuge16, earlyMatchString16); + nResult = naiveFind(dataStringHuge16, earlyMatchString16); + if (nResult != stubResult) { + PrintError(stubResult, nResult, num, "StringIndexofHuge", dataStringHuge16, earlyMatchString16); + failCount++; + } + num++; + stubResult = indexOfKernel(dataStringHuge16, midMatchString16); + nResult = naiveFind(dataStringHuge16, midMatchString16); + if (nResult != stubResult) { + PrintError(stubResult, nResult, num, "StringIndexofHuge", dataStringHuge16, midMatchString16); + failCount++; + } + num++; + stubResult = indexOfKernel(dataStringHuge16, lateMatchString16); + nResult = naiveFind(dataStringHuge16, lateMatchString16); + if (nResult != stubResult) { + PrintError(stubResult, nResult, num, "StringIndexofHuge", dataStringHuge16, lateMatchString16); + failCount++; + } + num++; + stubResult = indexOfKernel(dataStringHuge16, searchNoMatch16); + nResult = naiveFind(dataStringHuge16, searchNoMatch16); + if (nResult != stubResult) { + PrintError(stubResult, nResult, num, "StringIndexofHuge", dataStringHuge16, searchNoMatch16); + failCount++; + } + num++; + stubResult = indexOfKernel(searchString16, searchString16); + nResult = naiveFind(searchString16, searchString16); + if (nResult != stubResult) { + PrintError(stubResult, nResult, num, "StringIndexofHuge", searchString16, searchString16); + failCount++; + } + num++; + stubResult = indexOfKernel(dataString16, searchString16); + nResult = naiveFind(dataString16, searchString16); + if (nResult != stubResult) { + PrintError(stubResult, nResult, num, "StringIndexofHuge", dataString16, searchString16); + failCount++; + } + num++; + stubResult = indexOfKernel(dataString16, searchStringSmall16); + nResult = naiveFind(dataString16, searchStringSmall16); + if (nResult != stubResult) { + PrintError(stubResult, nResult, num, "StringIndexofHuge", dataString16, searchStringSmall16); + failCount++; + } + num++; + stubResult = indexOfKernel(dataStringHuge16, "B".repeat(30) + "X" + "A".repeat(30), 74); + nResult = naiveFind(dataStringHuge16, "B".repeat(30) + "X" + "A".repeat(30), 74); + if (nResult != stubResult) { + PrintError(stubResult, nResult, num, "StringIndexofHuge", dataStringHuge16, + "B".repeat(30) + "X" + "A".repeat(30)); + failCount++; + } + num++; + stubResult = indexOfKernel(dataStringHuge16, "A".repeat(32) + "\u01ef" + "B".repeat(32), 64); + nResult = naiveFind(dataStringHuge16, "A".repeat(32) + "\u01ef" + "B".repeat(32), 64); + if (nResult != stubResult) { + PrintError(stubResult, nResult, num, "StringIndexofHuge", dataStringHuge16, + "A".repeat(32) + "\u01ef" + "B".repeat(32)); + failCount++; + } + num++; + stubResult = indexOfKernel(midMatchString16, dataStringHuge16, 3); + nResult = naiveFind(midMatchString16, dataStringHuge16, 3); + if (nResult != stubResult) { + PrintError(stubResult, nResult, num, "StringIndexofHuge", midMatchString16, dataStringHuge16); + failCount++; + } + num++; + stubResult = indexOfKernel(dataStringHuge16, "A".repeat(32) + "B".repeat(30) + "\u01eeB"); + nResult = naiveFind(dataStringHuge16, "A".repeat(32) + "B".repeat(30) + "\u01eeB"); + if (nResult != stubResult) { + PrintError(stubResult, nResult, num, "StringIndexofHuge", dataStringHuge16, + "A".repeat(32) + "B".repeat(30) + "\u01eeB"); + failCount++; + } + num++; + } + + report("StringIndexofHuge ", failCount); + } + + ///////////////////////////////////////////////////////////////////// + // + // From StringIndexof + private static void StringIndexof() { + int stubResult = 0; + int failCount = 0; + + for (int xx = 0; xx < 2; xx++) { + int num = 1; + + String dataString = "ngdfilsoscargfdgf"; + String searchString = "oscar"; + String dataStringBig = "2937489745890797905764956790452976742965790437698498409583479067ngdcapaapapapasdkajdlkajskldjaslkjdlkasjdsalkjas"; + String searchStringBig = "capaapapapasdkajdlkajskldjaslkjdlkasjdsalk"; + String data = "0000100101010010110101010010101110101001110110101010010101010010000010111010101010101010100010010101110111010101101010100010010100001010111111100001010101001010100001010101001010101010111010010101010101010101010101010"; + String sub = "10101010"; + String shortSub1 = "1"; + String data2 = "00001001010100a10110101010010101110101001110110101010010101010010000010111010101010101010a100010010101110111010101101010100010010a100a0010101111111000010101010010101000010101010010101010101110a10010101010101010101010101010"; + String shortSub2 = "a"; + char searchChar = 's'; + + String string16Short = "scar\u01fe1"; + String string16Medium = "capaapapapasdkajdlkajskldjaslkjdlkasjdsalksca1r\u01fescar"; + String string16Long = "2937489745890797905764956790452976742965790437698498409583479067ngdcapaapapapasdkajdlkajskldjaslkjdlkasjdsalkja1sscar\u01fescar"; + char searchChar16 = 0x1fe; + String searchString16 = "\u01fe"; + + stubResult = indexOfKernel(dataStringBig, searchChar); + int nResult = naiveFind(dataStringBig, searchChar); + if (nResult != stubResult) { + PrintError(stubResult, nResult, num, "StringIndexof", dataStringBig, searchChar); + failCount++; + } + num++; + stubResult = indexOfKernel(searchStringBig, searchChar); + nResult = naiveFind(searchStringBig, searchChar); + if (nResult != stubResult) { + PrintError(stubResult, nResult, num, "StringIndexof", searchStringBig, searchChar); + failCount++; + } + num++; + stubResult = indexOfKernel(searchString, searchChar); + nResult = naiveFind(searchString, searchChar); + if (nResult != stubResult) { + PrintError(stubResult, nResult, num, "StringIndexof", searchString, searchChar); + failCount++; + } + num++; + stubResult = indexOfKernel(string16Long, searchChar16); + nResult = naiveFind(string16Long, searchChar16); + if (nResult != stubResult) { + PrintError(stubResult, nResult, num, "StringIndexof", string16Long, searchChar16); + failCount++; + } + num++; + stubResult = indexOfKernel(string16Medium, searchChar16); + nResult = naiveFind(string16Medium, searchChar16); + if (nResult != stubResult) { + PrintError(stubResult, nResult, num, "StringIndexof", string16Medium, searchChar16); + failCount++; + } + num++; + stubResult = indexOfKernel(string16Short, searchChar16); + nResult = naiveFind(string16Short, searchChar16); + if (nResult != stubResult) { + PrintError(stubResult, nResult, num, "StringIndexof", string16Short, searchChar16); + failCount++; + } + num++; + stubResult = indexOfKernel(dataStringBig, searchChar, 3); + nResult = naiveFind(dataStringBig, searchChar, 3); + if (nResult != stubResult) { + PrintError(stubResult, nResult, num, "StringIndexof", dataStringBig, searchChar); + failCount++; + } + num++; + stubResult = indexOfKernel(searchStringBig, searchChar, 3); + nResult = naiveFind(searchStringBig, searchChar, 3); + if (nResult != stubResult) { + PrintError(stubResult, nResult, num, "StringIndexof", searchStringBig, searchChar); + failCount++; + } + num++; + stubResult = indexOfKernel(searchString, searchChar, 1); + nResult = naiveFind(searchString, searchChar, 1); + if (nResult != stubResult) { + PrintError(stubResult, nResult, num, "StringIndexof", searchString, searchChar); + failCount++; + } + num++; + stubResult = indexOfKernel(string16Long, searchChar16, 3); + nResult = naiveFind(string16Long, searchChar16, 3); + if (nResult != stubResult) { + PrintError(stubResult, nResult, num, "StringIndexof", string16Long, searchChar16); + failCount++; + } + num++; + stubResult = indexOfKernel(string16Medium, searchChar16, 3); + nResult = naiveFind(string16Medium, searchChar16, 3); + if (nResult != stubResult) { + PrintError(stubResult, nResult, num, "StringIndexof", string16Medium, searchChar16); + failCount++; + } + num++; + stubResult = indexOfKernel(string16Short, searchChar16, 2); + nResult = naiveFind(string16Short, searchChar16, 2); + if (nResult != stubResult) { + PrintError(stubResult, nResult, num, "StringIndexof", string16Short, searchChar16); + failCount++; + } + num++; + stubResult = indexOfKernel(string16Long, shortSub1); + nResult = naiveFind(string16Long, shortSub1); + if (nResult != stubResult) { + PrintError(stubResult, nResult, num, "StringIndexof", string16Long, shortSub1); + failCount++; + } + num++; + stubResult = indexOfKernel(string16Medium, shortSub1); + nResult = naiveFind(string16Medium, shortSub1); + if (nResult != stubResult) { + PrintError(stubResult, nResult, num, "StringIndexof", string16Medium, shortSub1); + failCount++; + } + num++; + stubResult = indexOfKernel(string16Long, shortSub2); + nResult = naiveFind(string16Long, shortSub2); + if (nResult != stubResult) { + PrintError(stubResult, nResult, num, "StringIndexof", string16Long, shortSub2); + failCount++; + } + num++; + stubResult = indexOfKernel(string16Long, shortSub1, 3); + nResult = naiveFind(string16Long, shortSub1, 3); + if (nResult != stubResult) { + PrintError(stubResult, nResult, num, "StringIndexof", string16Long, shortSub1); + failCount++; + } + num++; + stubResult = indexOfKernel(string16Medium, shortSub1, 3); + nResult = naiveFind(string16Medium, shortSub1, 3); + if (nResult != stubResult) { + PrintError(stubResult, nResult, num, "StringIndexof", string16Medium, shortSub1); + failCount++; + } + num++; + stubResult = indexOfKernel(string16Short, shortSub2, 1); + nResult = naiveFind(string16Short, shortSub2, 1); + if (nResult != stubResult) { + PrintError(stubResult, nResult, num, "StringIndexof", string16Short, shortSub2); + failCount++; + } + num++; + stubResult = indexOfKernel(string16Long, searchString16, 3); + nResult = naiveFind(string16Long, searchString16, 3); + if (nResult != stubResult) { + PrintError(stubResult, nResult, num, "StringIndexof", string16Long, searchString16); + failCount++; + } + num++; + stubResult = indexOfKernel(string16Medium, searchString16, 3); + nResult = naiveFind(string16Medium, searchString16, 3); + if (nResult != stubResult) { + PrintError(stubResult, nResult, num, "StringIndexof", string16Medium, searchString16); + failCount++; + } + num++; + stubResult = indexOfKernel(string16Short, searchString16, 2); + nResult = naiveFind(string16Short, searchString16, 2); + if (nResult != stubResult) { + PrintError(stubResult, nResult, num, "StringIndexof", string16Short, searchString16); + failCount++; + } + num++; + stubResult = indexOfKernel(string16Long, searchString16); + nResult = naiveFind(string16Long, searchString16); + if (nResult != stubResult) { + PrintError(stubResult, nResult, num, "StringIndexof", string16Long, searchString16); + failCount++; + } + num++; + stubResult = indexOfKernel(string16Medium, searchString16); + nResult = naiveFind(string16Medium, searchString16); + if (nResult != stubResult) { + PrintError(stubResult, nResult, num, "StringIndexof", string16Medium, searchString16); + failCount++; + } + num++; + stubResult = indexOfKernel(string16Short, searchString16); + nResult = naiveFind(string16Short, searchString16); + if (nResult != stubResult) { + PrintError(stubResult, nResult, num, "StringIndexof", string16Short, searchString16); + failCount++; + } + num++; + stubResult = indexOfKernel(dataString, searchString, 2); + nResult = naiveFind(dataString, searchString, 2); + if (nResult != stubResult) { + PrintError(stubResult, nResult, num, "StringIndexof", dataString, searchString); + failCount++; + } + num++; + stubResult = indexOfKernel(dataStringBig, searchStringBig, 2); + nResult = naiveFind(dataStringBig, searchStringBig, 2); + if (nResult != stubResult) { + PrintError(stubResult, nResult, num, "StringIndexof", dataStringBig, searchStringBig); + } + { + int index = 0; + int dummy = 0; + while ((index = indexOfKernel(data, sub, index)) > -1) { + nResult = naiveFind(data, sub, index); + if (index != nResult) { + PrintError(stubResult, nResult, num, "StringIndexof", data, sub); + failCount++; + } + index++; + dummy += index; + } + num++; + } + { + int dummy = 0; + int index = 0; + while ((index = indexOfKernel(data, shortSub1, index)) > -1) { + nResult = naiveFind(data, shortSub1, index); + if (index != nResult) { + PrintError(stubResult, nResult, num, "StringIndexof", data, shortSub1); + failCount++; + } + index++; + dummy += index; + } + num++; + } + { + int dummy = 0; + int index = 0; + while ((index = indexOfKernel(data2, shortSub2, index)) > -1) { + nResult = naiveFind(data2, shortSub2, index); + if (index != nResult) { + PrintError(stubResult, nResult, num, "StringIndexof", data2, shortSub2); + failCount++; + } + index++; + dummy += index; + } + num++; + } + { + String tmp = "simple-hash:SHA-1/UTF-8"; + if (!tmp.contains("SHA-1")) { + PrintError(stubResult, nResult, num, "StringIndexof", "simple-hash:SHA-1/UTF-8", "SHA-1"); + failCount++; + } + num++; + } + } + + report("StringIndexof ", failCount); + } + + ///////////////////////////////////////////////////////////////////// + // + // From StringIndexofChar + private static void StringIndexofChar() { + int stubResult = 0; + int failCount = 0; + + for (int xx = 0; xx < 2; xx++) { + stubResult = 0; + int nResult = 0; + int num = 1; + + String[] latn1_short = new String[100]; + String[] latn1_sse4 = new String[100]; + String[] latn1_avx2 = new String[100]; + String[] latn1_mixedLength = new String[100]; + String[] utf16_short = new String[100]; + String[] utf16_sse4 = new String[100]; + String[] utf16_avx2 = new String[100]; + String[] utf16_mixedLength = new String[100]; + + for (int i = 0; i < 100; i++) { + latn1_short[i] = makeRndString(false, 15); + latn1_sse4[i] = makeRndString(false, 16); + latn1_avx2[i] = makeRndString(false, 32); + utf16_short[i] = makeRndString(true, 7); + utf16_sse4[i] = makeRndString(true, 8); + utf16_avx2[i] = makeRndString(true, 16); + latn1_mixedLength[i] = makeRndString(false, rng.nextInt(65)); + utf16_mixedLength[i] = makeRndString(true, rng.nextInt(65)); + } + for (String what : latn1_mixedLength) { + stubResult = indexOfKernel(what, 'a'); + nResult = naiveFind(what, 'a'); + if (nResult != stubResult) { + PrintError(stubResult, nResult, num, "StringIndexofChar", what, 'a'); + failCount++; + } + } + num++; + for (String what : utf16_mixedLength) { + stubResult = indexOfKernel(what, 'a'); + nResult = naiveFind(what, 'a'); + if (nResult != stubResult) { + PrintError(stubResult, nResult, num, "StringIndexofChar", what, 'a'); + failCount++; + } + } + num++; + for (String what : latn1_mixedLength) { + stubResult = indexOfKernel(what, "a"); + nResult = naiveFind(what, "a"); + if (nResult != stubResult) { + PrintError(stubResult, nResult, num, "StringIndexofChar", what, "a"); + failCount++; + } + } + num++; + for (String what : utf16_mixedLength) { + stubResult = indexOfKernel(what, "a"); + nResult = naiveFind(what, "a"); + if (nResult != stubResult) { + PrintError(stubResult, nResult, num, "StringIndexofChar", what, "a"); + failCount++; + } + } + num++; + for (String what : latn1_short) { + stubResult = indexOfKernel(what, 'a'); + nResult = naiveFind(what, 'a'); + if (nResult != stubResult) { + PrintError(stubResult, nResult, num, "StringIndexofChar", what, 'a'); + failCount++; + } + } + num++; + for (String what : latn1_sse4) { + stubResult = indexOfKernel(what, 'a'); + nResult = naiveFind(what, 'a'); + if (nResult != stubResult) { + PrintError(stubResult, nResult, num, "StringIndexofChar", what, 'a'); + failCount++; + } + } + num++; + for (String what : latn1_avx2) { + stubResult = indexOfKernel(what, 'a'); + nResult = naiveFind(what, 'a'); + if (nResult != stubResult) { + PrintError(stubResult, nResult, num, "StringIndexofChar", what, 'a'); + failCount++; + } + } + num++; + for (String what : utf16_short) { + stubResult = indexOfKernel(what, 'a'); + nResult = naiveFind(what, 'a'); + if (nResult != stubResult) { + PrintError(stubResult, nResult, num, "StringIndexofChar", what, 'a'); + failCount++; + } + } + num++; + for (String what : utf16_sse4) { + stubResult = indexOfKernel(what, 'a'); + nResult = naiveFind(what, 'a'); + if (nResult != stubResult) { + PrintError(stubResult, nResult, num, "StringIndexofChar", what, 'a'); + failCount++; + } + } + num++; + for (String what : utf16_avx2) { + stubResult = indexOfKernel(what, 'a'); + nResult = naiveFind(what, 'a'); + if (nResult != stubResult) { + PrintError(stubResult, nResult, num, "StringIndexofChar", what, 'a'); + failCount++; + } + } + num++; + for (String what : latn1_short) { + stubResult = indexOfKernel(what, "a"); + nResult = naiveFind(what, "a"); + if (nResult != stubResult) { + PrintError(stubResult, nResult, num, "StringIndexofChar", what, "a"); + failCount++; + } + } + num++; + for (String what : latn1_sse4) { + stubResult = indexOfKernel(what, "a"); + nResult = naiveFind(what, "a"); + if (nResult != stubResult) { + PrintError(stubResult, nResult, num, "StringIndexofChar", what, "a"); + failCount++; + } + } + num++; + for (String what : latn1_avx2) { + stubResult = indexOfKernel(what, "a"); + nResult = naiveFind(what, "a"); + if (nResult != stubResult) { + PrintError(stubResult, nResult, num, "StringIndexofChar", what, "a"); + failCount++; + } + } + num++; + for (String what : utf16_short) { + stubResult = indexOfKernel(what, "a"); + nResult = naiveFind(what, "a"); + if (nResult != stubResult) { + PrintError(stubResult, nResult, num, "StringIndexofChar", what, "a"); + failCount++; + } + } + num++; + for (String what : utf16_sse4) { + stubResult = indexOfKernel(what, "a"); + nResult = naiveFind(what, "a"); + if (nResult != stubResult) { + PrintError(stubResult, nResult, num, "StringIndexofChar", what, "a"); + failCount++; + } + } + num++; + for (String what : utf16_avx2) { + stubResult = indexOfKernel(what, "a"); + nResult = naiveFind(what, "a"); + if (nResult != stubResult) { + PrintError(stubResult, nResult, num, "StringIndexofChar", what, "a"); + failCount++; + } + } + num++; + } + + report("StringIndexofChar ", failCount); + } + +} diff --git a/test/micro/org/openjdk/bench/java/lang/StringIndexOfHuge.java b/test/micro/org/openjdk/bench/java/lang/StringIndexOfHuge.java new file mode 100644 index 00000000000..cac86bcc17f --- /dev/null +++ b/test/micro/org/openjdk/bench/java/lang/StringIndexOfHuge.java @@ -0,0 +1,273 @@ +/* + * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ +package org.openjdk.bench.java.lang; + +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Warmup; + +import java.util.concurrent.TimeUnit; + +@BenchmarkMode(Mode.AverageTime) +@OutputTimeUnit(TimeUnit.NANOSECONDS) +@State(Scope.Thread) +@Warmup(iterations = 5, time = 1) +@Measurement(iterations = 5, time = 1) +@Fork(value = 3) +public class StringIndexOfHuge { + + private String dataString; + private String dataString16; + private String dataStringHuge; + private String dataStringHuge16; + private String earlyMatchString; + private String earlyMatchString16; + private String midMatchString; + private String midMatchString16; + private String lateMatchString; + private String lateMatchString16; + + private String searchString; + private String searchString16; + private String searchStringSmall; + private String searchStringSmall16; + + private String searchStringHuge; + private String searchStringHuge16; + + private String searchNoMatch; + private String searchNoMatch16; + + private String Amdahl_1; + private String Amdahl_2; + private String Amdahl_3; + private String Amdahl_4; + private String Amdahl_5; + private String Amdahl_6; + + @Setup + public void setup() { + dataString = "ngdflsoscargfdgf"; + dataString16 = "ngdfilso\u01facargfd\u01eef"; + dataStringHuge = (("A".repeat(32) + "B".repeat(32)).repeat(16) + "X").repeat(2) + "bB"; + dataStringHuge16 = "\u01de" + (("A".repeat(32) + "B".repeat(32)).repeat(16) + "\u01fe").repeat(2) + "\u01eeB"; + earlyMatchString = dataStringHuge.substring(0, 34); + earlyMatchString16 = dataStringHuge16.substring(0, 34); + midMatchString = dataStringHuge.substring(dataStringHuge.length() / 2 - 16, dataStringHuge.length() / 2 + 17); + midMatchString16 = dataStringHuge16.substring(dataStringHuge16.length() / 2 - 16, dataStringHuge16.length() / 2 + 17); + lateMatchString = dataStringHuge.substring(dataStringHuge.length() - 31); + lateMatchString16 = dataStringHuge16.substring(dataStringHuge16.length() - 31); + + searchString = "oscar"; + searchString16 = "o\u01facar"; + searchStringSmall = "dgf"; + searchStringSmall16 = "d\u01eef"; + + searchStringHuge = "capaapapapasdkajdlkajskldjaslkajdlkajskldjaslkjdlkasjdsalk"; + searchStringHuge16 = "capaapapapasdkajdlka\u01feskldjaslkajdlkajskldjaslkjdlkasjdsalk"; + + searchNoMatch = "XYXyxYxy".repeat(22); + searchNoMatch16 = "\u01ab\u01ba\u01cb\u01bc\u01de\u01ed\u01fa\u01af".repeat(22); + + Amdahl_1 = "B".repeat(30) + "X" + "A".repeat(30); + Amdahl_2 = "A".repeat(32) + "F" + "B".repeat(32); + Amdahl_3 = "A".repeat(32) + "B".repeat(32) + "XbB"; + Amdahl_4 = "B".repeat(30) + "\u01ef" + "A".repeat(30); + Amdahl_5 = "A".repeat(32) + "\u01ef" + "B".repeat(32); + Amdahl_6 = "A".repeat(32) + "B".repeat(32) + "\u01fe\u01eeB"; + } + + + /** IndexOf Micros */ + @Benchmark + public int searchHugeEarlyMatch() { + return dataStringHuge.indexOf(earlyMatchString); + } + + @Benchmark + public int searchHugeMiddleMatch() { + return dataStringHuge.indexOf(midMatchString); + } + + @Benchmark + public int searchHugeLateMatch() { + return dataStringHuge.indexOf(lateMatchString); + } + + @Benchmark + public int searchHugeNoMatch() { + return dataStringHuge.indexOf(searchNoMatch); + } + + @Benchmark + public int searchSmallEarlyMatch() { + return searchString.indexOf(searchString); + } + + @Benchmark + public int searchSmallMidMatch() { + return dataString.indexOf(searchString); + } + + @Benchmark + public int searchSmallLateMatch() { + return dataString.indexOf(searchStringSmall); + } + + @Benchmark + public int searchHugeLargeSubstring() { + return dataStringHuge.indexOf(Amdahl_1, 74); + } + + @Benchmark + public int searchHugeLargeSubstringNoMatch() { + return dataStringHuge.indexOf(Amdahl_2, 64); + } + + @Benchmark + public int searchSubstringLongerThanString() { + return midMatchString.indexOf(dataStringHuge, 3); + } + + @Benchmark + public int searchHugeWorstCase() { + return dataStringHuge.indexOf(Amdahl_3); + } + + @Benchmark + public int search16HugeEarlyMatch() { + return dataStringHuge16.indexOf(earlyMatchString); + } + + @Benchmark + public int search16HugeMiddleMatch() { + return dataStringHuge16.indexOf(midMatchString); + } + + @Benchmark + public int search16HugeLateMatch() { + return dataStringHuge16.indexOf(lateMatchString); + } + + @Benchmark + public int search16HugeNoMatch() { + return dataStringHuge16.indexOf(searchNoMatch); + } + + @Benchmark + public int search16SmallEarlyMatch() { + return searchString16.indexOf(searchString); + } + + @Benchmark + public int search16SmallMidMatch() { + return dataString16.indexOf(searchString); + } + + @Benchmark + public int search16SmallLateMatch() { + return dataString16.indexOf(searchStringSmall); + } + + @Benchmark + public int search16HugeLargeSubstring() { + return dataStringHuge16.indexOf(Amdahl_1, 74); + } + + @Benchmark + public int search16HugeLargeSubstringNoMatch() { + return dataStringHuge16.indexOf(Amdahl_2, 64); + } + + @Benchmark + public int search16SubstringLongerThanString() { + return midMatchString16.indexOf(dataStringHuge, 3); + } + + @Benchmark + public int search16HugeWorstCase() { + return dataStringHuge16.indexOf(Amdahl_3); + } + + @Benchmark + public int search16HugeEarlyMatch16() { + return dataStringHuge16.indexOf(earlyMatchString16); + } + + @Benchmark + public int search16HugeMiddleMatch16() { + return dataStringHuge16.indexOf(midMatchString16); + } + + @Benchmark + public int search16HugeLateMatch16() { + return dataStringHuge16.indexOf(lateMatchString16); + } + + @Benchmark + public int search16HugeNoMatch16() { + return dataStringHuge16.indexOf(searchNoMatch16); + } + + @Benchmark + public int search16SmallEarlyMatch16() { + return searchString16.indexOf(searchString16); + } + + @Benchmark + public int search16SmallMidMatch16() { + return dataString16.indexOf(searchString16); + } + + @Benchmark + public int search16SmallLateMatch16() { + return dataString16.indexOf(searchStringSmall16); + } + + @Benchmark + public int search16HugeLargeSubstring16() { + return dataStringHuge16.indexOf(Amdahl_4, 74); + } + + @Benchmark + public int search16HugeLargeSubstringNoMatch16() { + return dataStringHuge16.indexOf(Amdahl_5, 64); + } + + @Benchmark + public int search16SubstringLongerThanString16() { + return midMatchString16.indexOf(dataStringHuge16, 3); + } + + @Benchmark + public int search16HugeWorstCase16() { + return dataStringHuge16.indexOf(Amdahl_6); + } +}