From 5b2e2e4695768a6bd8090fb9a6c342fcddcbb3fd Mon Sep 17 00:00:00 2001 From: Raffaello Giulietti Date: Fri, 3 Mar 2023 20:51:13 +0000 Subject: [PATCH] 8302590: Add String.indexOf(int ch, int fromIndex, int toIndex) Reviewed-by: rriggs, alanb --- .../share/classes/java/lang/String.java | 63 ++++- .../share/classes/java/lang/StringLatin1.java | 12 +- .../share/classes/java/lang/StringUTF16.java | 14 +- .../jdk/java/lang/String/IndexOfBeginEnd.java | 223 ++++++++++++++++++ 4 files changed, 295 insertions(+), 17 deletions(-) create mode 100644 test/jdk/java/lang/String/IndexOfBeginEnd.java diff --git a/src/java.base/share/classes/java/lang/String.java b/src/java.base/share/classes/java/lang/String.java index 0f272eba893..ecebb77dd59 100644 --- a/src/java.base/share/classes/java/lang/String.java +++ b/src/java.base/share/classes/java/lang/String.java @@ -2439,10 +2439,69 @@ public final class String * character sequence represented by this object that is greater * than or equal to {@code fromIndex}, or {@code -1} * if the character does not occur. + * + * @apiNote + * Unlike {@link #substring(int)}, for example, this method does not throw + * an exception when {@code fromIndex} is outside the valid range. + * Rather, it returns -1 when {@code fromIndex} is larger than the length of + * the string. + * This result is, by itself, indistinguishable from a genuine absence of + * {@code ch} in the string. + * If stricter behavior is needed, {@link #indexOf(int, int, int)} + * should be considered instead. + * On a {@link String} {@code s}, for example, + * {@code s.indexOf(ch, fromIndex, s.length())} would throw if + * {@code fromIndex} were larger than the string length, or were negative. */ public int indexOf(int ch, int fromIndex) { - return isLatin1() ? StringLatin1.indexOf(value, ch, fromIndex) - : StringUTF16.indexOf(value, ch, fromIndex); + return isLatin1() ? StringLatin1.indexOf(value, ch, fromIndex, length()) + : StringUTF16.indexOf(value, ch, fromIndex, length()); + } + + /** + * Returns the index within this string of the first occurrence of the + * specified character, starting the search at {@code beginIndex} and + * stopping before {@code endIndex}. + * + *

If a character with value {@code ch} occurs in the + * character sequence represented by this {@code String} + * object at an index no smaller than {@code beginIndex} but smaller than + * {@code endIndex}, then + * the index of the first such occurrence is returned. For values + * of {@code ch} in the range from 0 to 0xFFFF (inclusive), + * this is the smallest value k such that: + *

+     * (this.charAt(k) == ch) && (beginIndex <= k < endIndex)
+     * 
+ * is true. For other values of {@code ch}, it is the + * smallest value k such that: + *
+     * (this.codePointAt(k) == ch) && (beginIndex <= k < endIndex)
+     * 
+ * is true. In either case, if no such character occurs in this + * string at or after position {@code beginIndex} and before position + * {@code endIndex}, then {@code -1} is returned. + * + *

All indices are specified in {@code char} values + * (Unicode code units). + * + * @param ch a character (Unicode code point). + * @param beginIndex the index to start the search from (included). + * @param endIndex the index to stop the search at (excluded). + * @return the index of the first occurrence of the character in the + * character sequence represented by this object that is greater + * than or equal to {@code beginIndex} and less than {@code endIndex}, + * or {@code -1} if the character does not occur. + * @throws StringIndexOutOfBoundsException if {@code beginIndex} + * is negative, or {@code endIndex} is larger than the length of + * this {@code String} object, or {@code beginIndex} is larger than + * {@code endIndex}. + * @since 21 + */ + public int indexOf(int ch, int beginIndex, int endIndex) { + checkBoundsBeginEnd(beginIndex, endIndex, length()); + return isLatin1() ? StringLatin1.indexOf(value, ch, beginIndex, endIndex) + : StringUTF16.indexOf(value, ch, beginIndex, endIndex); } /** diff --git a/src/java.base/share/classes/java/lang/StringLatin1.java b/src/java.base/share/classes/java/lang/StringLatin1.java index e96e660728b..7c12e5711b3 100644 --- a/src/java.base/share/classes/java/lang/StringLatin1.java +++ b/src/java.base/share/classes/java/lang/StringLatin1.java @@ -192,18 +192,16 @@ final class StringLatin1 { }; } - public static int indexOf(byte[] value, int ch, int fromIndex) { + public static int indexOf(byte[] value, int ch, int fromIndex, int toIndex) { if (!canEncode(ch)) { return -1; } - int max = value.length; - if (fromIndex < 0) { - fromIndex = 0; - } else if (fromIndex >= max) { - // Note: fromIndex might be near -1>>>1. + fromIndex = Math.max(fromIndex, 0); + toIndex = Math.min(toIndex, value.length); + if (fromIndex >= toIndex) { return -1; } - return indexOfChar(value, ch, fromIndex, max); + return indexOfChar(value, ch, fromIndex, toIndex); } @IntrinsicCandidate diff --git a/src/java.base/share/classes/java/lang/StringUTF16.java b/src/java.base/share/classes/java/lang/StringUTF16.java index c65435c0ac1..73d85863990 100644 --- a/src/java.base/share/classes/java/lang/StringUTF16.java +++ b/src/java.base/share/classes/java/lang/StringUTF16.java @@ -419,20 +419,18 @@ final class StringUTF16 { }; } - public static int indexOf(byte[] value, int ch, int fromIndex) { - int max = value.length >> 1; - if (fromIndex < 0) { - fromIndex = 0; - } else if (fromIndex >= max) { - // Note: fromIndex might be near -1>>>1. + public static int indexOf(byte[] value, int ch, int fromIndex, int toIndex) { + fromIndex = Math.max(fromIndex, 0); + toIndex = Math.min(toIndex, value.length >> 1); + if (fromIndex >= toIndex) { return -1; } if (ch < Character.MIN_SUPPLEMENTARY_CODE_POINT) { // handle most cases here (ch is a BMP code point or a // negative value (invalid code point)) - return indexOfChar(value, ch, fromIndex, max); + return indexOfChar(value, ch, fromIndex, toIndex); } else { - return indexOfSupplementary(value, ch, fromIndex, max); + return indexOfSupplementary(value, ch, fromIndex, toIndex); } } diff --git a/test/jdk/java/lang/String/IndexOfBeginEnd.java b/test/jdk/java/lang/String/IndexOfBeginEnd.java new file mode 100644 index 00000000000..c231c035228 --- /dev/null +++ b/test/jdk/java/lang/String/IndexOfBeginEnd.java @@ -0,0 +1,223 @@ +/* + * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertThrows; + +/* + * @test + * @bug 8302590 + * @summary This one is for String.indexOf(int,int,int). + * @run testng IndexOfBeginEnd + */ + +public class IndexOfBeginEnd { + + private static final String STRING_EMPTY = ""; + private static final String STRING_L1 = "A"; + private static final String STRING_L2 = "AB"; + private static final String STRING_L4 = "ABCD"; + private static final String STRING_LLONG = "ABCDEFGH"; + private static final String STRING_U1 = "\uFF21"; + private static final String STRING_U2 = "\uFF21\uFF22"; + private static final String STRING_LDUPLICATE = "ABABABABAB"; + private static final String STRING_M11 = "A\uFF21"; + private static final String STRING_M12 = "\uFF21A"; + private static final String STRING_UDUPLICATE = "\uFF21\uFF22\uFF21\uFF22\uFF21\uFF22\uFF21\uFF22\uFF21\uFF22"; + private static final String STRING_SUPPLEMENTARY = "\uD801\uDC00\uD801\uDC01\uFF21A"; + private static final String STRING_MDUPLICATE1 = "\uFF21A\uFF21A\uFF21A\uFF21A\uFF21A"; + private static final String STRING_MDUPLICATE2 = "A\uFF21A\uFF21A\uFF21A\uFF21A\uFF21"; + + @DataProvider + public Object[][] results() { + return new Object[][] { + + new Object[] { STRING_EMPTY, (int) 'Z', 0, 0, -1 }, + + new Object[] { STRING_L1, (int) 'A', 0, 1, 0 }, + new Object[] { STRING_L1, (int) 'A', 1, 1, -1 }, + new Object[] { STRING_L1, (int) 'Z', 0, 1, -1 }, + + new Object[] { STRING_L2, (int) 'A', 0, 2, 0 }, + new Object[] { STRING_L2, (int) 'A', 0, 1, 0 }, + new Object[] { STRING_L2, (int) 'A', 1, 1, -1 }, + new Object[] { STRING_L2, (int) 'A', 1, 2, -1 }, + new Object[] { STRING_L2, (int) 'B', 0, 2, 1 }, + new Object[] { STRING_L2, (int) 'B', 0, 1, -1 }, + new Object[] { STRING_L2, (int) 'B', 1, 1, -1 }, + new Object[] { STRING_L2, (int) 'B', 1, 2, 1 }, + new Object[] { STRING_L2, (int) 'B', 2, 2, -1 }, + new Object[] { STRING_L2, (int) 'Z', 0, 2, -1 }, + + new Object[] { STRING_L4, (int) 'A', 0, 4, 0 }, + new Object[] { STRING_L4, (int) 'A', 0, 1, 0 }, + new Object[] { STRING_L4, (int) 'A', 1, 4, -1 }, + new Object[] { STRING_L4, (int) 'D', 0, 4, 3 }, + new Object[] { STRING_L4, (int) 'D', 0, 3, -1 }, + new Object[] { STRING_L4, (int) 'D', 3, 4, 3 }, + new Object[] { STRING_L4, (int) 'D', 4, 4, -1 }, + new Object[] { STRING_L4, (int) 'Z', 0, 4, -1 }, + + new Object[] { STRING_LLONG, (int) 'A', 0, 8, 0 }, + new Object[] { STRING_LLONG, (int) 'A', 0, 1, 0 }, + new Object[] { STRING_LLONG, (int) 'A', 1, 1, -1 }, + new Object[] { STRING_LLONG, (int) 'A', 1, 8, -1 }, + new Object[] { STRING_LLONG, (int) 'H', 0, 8, 7 }, + new Object[] { STRING_LLONG, (int) 'H', 0, 7, -1 }, + new Object[] { STRING_LLONG, (int) 'H', 7, 8, 7 }, + new Object[] { STRING_LLONG, (int) 'H', 8, 8, -1 }, + new Object[] { STRING_LLONG, (int) 'Z', 0, 8, -1 }, + + new Object[] { STRING_U1, (int) '\uFF21', 0, 1, 0 }, + new Object[] { STRING_U1, (int) '\uFF21', 0, 0, -1 }, + new Object[] { STRING_U1, (int) '\uFF21', 1, 1, -1 }, + new Object[] { STRING_U1, (int) 'A', 0, 1, -1 }, + + new Object[] { STRING_U2, (int) '\uFF21', 0, 2, 0 }, + new Object[] { STRING_U2, (int) '\uFF21', 0, 1, 0 }, + new Object[] { STRING_U2, (int) '\uFF21', 1, 2, -1 }, + new Object[] { STRING_U2, (int) '\uFF22', 0, 2, 1 }, + new Object[] { STRING_U2, (int) '\uFF22', 0, 1, -1 }, + new Object[] { STRING_U2, (int) '\uFF22', 1, 2, 1 }, + new Object[] { STRING_U2, (int) '\uFF22', 2, 2, -1 }, + new Object[] { STRING_U2, (int) '\uFF3A', 0, 2, -1 }, + + new Object[] { STRING_LDUPLICATE, (int) 'A', 0, 10, 0 }, + new Object[] { STRING_LDUPLICATE, (int) 'A', 1, 3, 2 }, + new Object[] { STRING_LDUPLICATE, (int) 'A', 3, 3, -1 }, + new Object[] { STRING_LDUPLICATE, (int) 'A', 3, 5, 4 }, + new Object[] { STRING_LDUPLICATE, (int) 'B', 0, 10, 1 }, + new Object[] { STRING_LDUPLICATE, (int) 'B', 2, 4, 3 }, + new Object[] { STRING_LDUPLICATE, (int) 'B', 4, 6, 5 }, + + new Object[] { STRING_M11, (int) 'A', 0, 2, 0 }, + new Object[] { STRING_M11, (int) 'A', 0, 1, 0 }, + new Object[] { STRING_M11, (int) 'A', 1, 2, -1 }, + new Object[] { STRING_M11, (int) 'A', 2, 2, -1 }, + new Object[] { STRING_M11, (int) '\uFF21', 0, 2, 1 }, + new Object[] { STRING_M11, (int) '\uFF21', 0, 1, -1 }, + new Object[] { STRING_M11, (int) '\uFF21', 1, 2, 1 }, + new Object[] { STRING_M11, (int) '\uFF21', 2, 2, -1 }, + new Object[] { STRING_M11, (int) '\uFF3A', 0, 2, -1 }, + + new Object[] { STRING_M12, (int) '\uFF21', 0, 2, 0 }, + new Object[] { STRING_M12, (int) '\uFF21', 0, 1, 0 }, + new Object[] { STRING_M12, (int) '\uFF21', 1, 2, -1 }, + new Object[] { STRING_M12, (int) '\uFF21', 2, 2, -1 }, + new Object[] { STRING_M12, (int) 'A', 0, 2, 1 }, + new Object[] { STRING_M12, (int) 'A', 0, 1, -1 }, + new Object[] { STRING_M12, (int) 'A', 1, 2, 1 }, + new Object[] { STRING_M12, (int) 'A', 2, 2, -1 }, + new Object[] { STRING_M12, (int) '\uFF3A', 0, 2, -1 }, + + new Object[] { STRING_UDUPLICATE, (int) '\uFF21', 0, 10, 0 }, + new Object[] { STRING_UDUPLICATE, (int) '\uFF21', 1, 3, 2 }, + new Object[] { STRING_UDUPLICATE, (int) '\uFF21', 3, 3, -1 }, + new Object[] { STRING_UDUPLICATE, (int) '\uFF21', 3, 5, 4 }, + new Object[] { STRING_UDUPLICATE, (int) '\uFF22', 0, 10, 1 }, + new Object[] { STRING_UDUPLICATE, (int) '\uFF22', 2, 4, 3 }, + new Object[] { STRING_UDUPLICATE, (int) '\uFF22', 4, 6, 5 }, + + new Object[] { STRING_SUPPLEMENTARY, 'A', 0, 6, 5 }, + new Object[] { STRING_SUPPLEMENTARY, 'A', 2, 6, 5 }, + new Object[] { STRING_SUPPLEMENTARY, 'A', 2, 4, -1 }, + new Object[] { STRING_SUPPLEMENTARY, 'A', 4, 4, -1 }, + new Object[] { STRING_SUPPLEMENTARY, '\uFF21', 0, 6, 4 }, + new Object[] { STRING_SUPPLEMENTARY, '\uFF21', 2, 2, -1 }, + new Object[] { STRING_SUPPLEMENTARY, '\uFF21', 2, 6, 4 }, + new Object[] { STRING_SUPPLEMENTARY, '\uFF21', 2, 4, -1 }, + new Object[] { STRING_SUPPLEMENTARY, + Character.toCodePoint('\uD801', '\uDC00'), 0, 6, 0 }, + new Object[] { STRING_SUPPLEMENTARY, + Character.toCodePoint('\uD801', '\uDC00'), 0, 3, 0 }, + new Object[] { STRING_SUPPLEMENTARY, + Character.toCodePoint('\uD801', '\uDC00'), 0, 1, -1 }, + new Object[] { STRING_SUPPLEMENTARY, + Character.toCodePoint('\uD801', '\uDC00'), 1, 4, -1 }, + new Object[] { STRING_SUPPLEMENTARY, + Character.toCodePoint('\uD801', '\uDC01'), 0, 6, 2 }, + new Object[] { STRING_SUPPLEMENTARY, + Character.toCodePoint('\uD801', '\uDC01'), 2, 2, -1 }, + new Object[] { STRING_SUPPLEMENTARY, + Character.toCodePoint('\uD801', '\uDC01'), 2, 5, 2 }, + new Object[] { STRING_SUPPLEMENTARY, + Character.toCodePoint('\uD801', '\uDC01'), 2, 3, -1 }, + new Object[] { STRING_SUPPLEMENTARY, + Character.toCodePoint('\uD801', '\uDC01'), 3, 6, -1 }, + }; + } + + @DataProvider + public Object[][] exceptions() { + return new Object[][]{ + new Object[]{STRING_LDUPLICATE, 'A', -1, 0}, + new Object[]{STRING_LDUPLICATE, 'A', 0, 100}, + new Object[]{STRING_LDUPLICATE, 'A', -1, 100}, + new Object[]{STRING_LDUPLICATE, 'A', 3, 1}, + + new Object[]{STRING_UDUPLICATE, 'A', -1, 0}, + new Object[]{STRING_UDUPLICATE, 'A', 0, 100}, + new Object[]{STRING_UDUPLICATE, 'A', -1, 100}, + new Object[]{STRING_UDUPLICATE, 'A', 3, 1}, + + new Object[]{STRING_MDUPLICATE1, 'A', -1, 0}, + new Object[]{STRING_MDUPLICATE1, 'A', 0, 100}, + new Object[]{STRING_MDUPLICATE1, 'A', -1, 100}, + new Object[]{STRING_MDUPLICATE1, 'A', 3, 1}, + + new Object[]{STRING_MDUPLICATE2, 'A', -1, 0}, + new Object[]{STRING_MDUPLICATE2, 'A', 0, 100}, + new Object[]{STRING_MDUPLICATE2, 'A', -1, 100}, + new Object[]{STRING_MDUPLICATE2, 'A', 3, 1}, + }; + } + + @Test(dataProvider = "results") + public void testIndexOf(String str, int ch, int from, int to, int expected) { + assertEquals(str.indexOf(ch, from, to), expected, + String.format("testing String(%s).indexOf(%d,%d,%d)", + escapeNonASCIIs(str), ch, from, to)); + } + + @Test(dataProvider = "exceptions") + public void testIndexOf(String str, int ch, int from, int to) { + assertThrows(StringIndexOutOfBoundsException.class, + () -> str.indexOf(ch, from, to)); + } + + private static String escapeNonASCIIs(String s) { + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < s.length(); ++i) { + char c = s.charAt(i); + if (c >= 0x100) { + sb.append("\\u").append(Integer.toHexString(c)); + } else { + sb.append(c); + } + } + return sb.toString(); + } +}