From bffc8484c32ad6c3205f7cebe4e262a2dc9de57e Mon Sep 17 00:00:00 2001 From: Justin Lu Date: Wed, 26 Jun 2024 17:10:09 +0000 Subject: [PATCH] 8333755: NumberFormat integer only parsing breaks when format has suffix Reviewed-by: naoto --- .../java/text/CompactNumberFormat.java | 24 +--- .../classes/java/text/DecimalFormat.java | 80 +++++++---- .../share/classes/java/text/NumberFormat.java | 9 +- .../Format/NumberFormat/BigDecimalParse.java | 28 ++-- .../Format/NumberFormat/StrictParseTest.java | 129 +++++++++++++----- 5 files changed, 169 insertions(+), 101 deletions(-) diff --git a/src/java.base/share/classes/java/text/CompactNumberFormat.java b/src/java.base/share/classes/java/text/CompactNumberFormat.java index cb1a9546b12..25fd9923b06 100644 --- a/src/java.base/share/classes/java/text/CompactNumberFormat.java +++ b/src/java.base/share/classes/java/text/CompactNumberFormat.java @@ -1724,7 +1724,7 @@ public final class CompactNumberFormat extends NumberFormat { // Call DecimalFormat.subparseNumber() method to parse the // number part of the input text position = decimalFormat.subparseNumber(text, position, - digitList, false, false, status); + digitList, false, false, status).fullPos(); if (position == -1) { // Unable to parse the number successfully @@ -1732,26 +1732,6 @@ public final class CompactNumberFormat extends NumberFormat { pos.errorIndex = oldStart; return null; } - - // If parse integer only is true and the parsing is broken at - // decimal point, then pass/ignore all digits and move pointer - // at the start of suffix, to process the suffix part - if (isParseIntegerOnly() && position < text.length() - && text.charAt(position) == symbols.getDecimalSeparator()) { - position++; // Pass decimal character - for (; position < text.length(); ++position) { - char ch = text.charAt(position); - int digit = ch - symbols.getZeroDigit(); - if (digit < 0 || digit > 9) { - digit = Character.digit(ch, 10); - // Parse all digit characters - if (!(digit >= 0 && digit <= 9)) { - break; - } - } - } - } - // Number parsed successfully; match prefix and // suffix to obtain multiplier pos.index = position; @@ -2372,6 +2352,8 @@ public final class CompactNumberFormat extends NumberFormat { * parsed as the value {@code 1234000} (1234 (integer part) * 1000 * (thousand)) and the fractional part would be skipped. * The exact format accepted by the parse operation is locale dependent. + * @implSpec This implementation does not set the {@code ParsePosition} index + * to the position of the decimal symbol, but rather the end of the string. * * @return {@code true} if compact numbers should be parsed as integers * only; {@code false} otherwise diff --git a/src/java.base/share/classes/java/text/DecimalFormat.java b/src/java.base/share/classes/java/text/DecimalFormat.java index f575e86eced..1f249888a28 100644 --- a/src/java.base/share/classes/java/text/DecimalFormat.java +++ b/src/java.base/share/classes/java/text/DecimalFormat.java @@ -2150,10 +2150,7 @@ public class DecimalFormat extends NumberFormat { * #getGroupingSize()} is not adhered to *
  • {@link #isGroupingUsed()} returns {@code false}, and the grouping * symbol is found - *
  • {@link #isParseIntegerOnly()} returns {@code true}, and the decimal - * separator is found - *
  • {@link #isGroupingUsed()} returns {@code true} and {@link - * #isParseIntegerOnly()} returns {@code false}, and the grouping + *
  • {@link #isGroupingUsed()} returns {@code true} and the grouping * symbol occurs after the decimal separator *
  • Any other characters are found, that are not the expected symbols, * and are not digits that occur within the numerical portion @@ -2379,7 +2376,8 @@ public class DecimalFormat extends NumberFormat { // position will serve as new index when success, otherwise it will // serve as errorIndex when failure - position = subparseNumber(text, position, digits, true, isExponent, status); + NumericPosition pos = subparseNumber(text, position, digits, true, isExponent, status); + position = pos.fullPos; // First character after the prefix was un-parseable, should // fail regardless if lenient or strict. @@ -2422,9 +2420,15 @@ public class DecimalFormat extends NumberFormat { return false; } - // No failures, thus increment the index by the suffix - parsePosition.index = position + - (gotPositive ? positiveSuffix.length() : negativeSuffix.length()); + // When parsing integer only, index should be int pos + // If intPos is 0, the entire value was integer + if (isParseIntegerOnly() && pos.intPos > 0) { + parsePosition.index = pos.intPos; + } else { + // increment the index by the suffix + parsePosition.index = position + + (gotPositive ? positiveSuffix.length() : negativeSuffix.length()); + } } else { parsePosition.index = position; } @@ -2437,6 +2441,19 @@ public class DecimalFormat extends NumberFormat { return true; } + /** + * NumericPosition is a helper record class that stores two indices of interest. + * {@code fullPos} is either the first unparseable character or -1 in case + * of no valid number parsed. {@code intPos} reflects the position of + * a parsed decimal symbol, if one exists. When parsing with {@code isParseIntegerOnly()}, + * {@code fullPos} is used to match the suffix, and reset the {@code ParsePosition} + * index to {@code intPos}. + * + * @param fullPos an index that reflects the full traversal of the numerical String + * @param intPos an index that reflects the position of a parsed decimal symbol. + */ + record NumericPosition(int fullPos, int intPos) {} + /** * Parses a number from the given {@code text}. The text is parsed * beginning at {@code position}, until an unparseable character is seen. @@ -2449,14 +2466,15 @@ public class DecimalFormat extends NumberFormat { * @param status upon return contains boolean status flags indicating * whether the value is infinite and whether it is * positive - * @return returns the position of the first unparseable character or - * -1 in case of no valid number parsed + * @return returns a {@code NumericPosition} that stores both a full + * traversal index, and an int only index. */ - int subparseNumber(String text, int position, - DigitList digits, boolean checkExponent, - boolean isExponent, boolean[] status) { + NumericPosition subparseNumber(String text, int position, + DigitList digits, boolean checkExponent, + boolean isExponent, boolean[] status) { // process digits or Inf, find decimal position status[STATUS_INFINITE] = false; + int intIndex = 0; if (!isExponent && text.regionMatches(position, symbols.getInfinity(), 0, symbols.getInfinity().length())) { position += symbols.getInfinity().length(); @@ -2516,7 +2534,7 @@ public class DecimalFormat extends NumberFormat { if (parseStrict && isGroupingUsed() && position == startPos + groupingSize && prevSeparatorIndex == -groupingSize && !sawDecimal && digit >= 0 && digit <= 9) { - return position; + return new NumericPosition(position, intIndex); } if (digit == 0) { @@ -2538,37 +2556,44 @@ public class DecimalFormat extends NumberFormat { --digits.decimalAt; } else { ++digitCount; - digits.append((char)(digit + '0')); + if (!sawDecimal || !isParseIntegerOnly()) { + digits.append((char)(digit + '0')); + } } } else if (digit > 0 && digit <= 9) { // [sic] digit==0 handled above sawDigit = true; ++digitCount; - digits.append((char)(digit + '0')); + if (!sawDecimal || !isParseIntegerOnly()) { + digits.append((char) (digit + '0')); + } // Cancel out backup setting (see grouping handler below) backup = -1; } else if (!isExponent && ch == decimal) { // Check grouping size on decimal separator if (parseStrict && isGroupingViolation(position, prevSeparatorIndex)) { - return groupingViolationIndex(position, prevSeparatorIndex); + return new NumericPosition( + groupingViolationIndex(position, prevSeparatorIndex), intIndex); } // If we're only parsing integers, or if we ALREADY saw the // decimal, then don't parse this one. - if (isParseIntegerOnly() || sawDecimal) { + if (sawDecimal) { break; } + intIndex = position; digits.decimalAt = digitCount; // Not digits.count! sawDecimal = true; } else if (!isExponent && ch == grouping && isGroupingUsed()) { if (parseStrict) { // text should not start with grouping when strict if (position == startPos) { - return startPos; + return new NumericPosition(startPos, intIndex); } // when strict, fail if grouping occurs after decimal OR // current group violates grouping size if (sawDecimal || (isGroupingViolation(position, prevSeparatorIndex))) { - return groupingViolationIndex(position, prevSeparatorIndex); + return new NumericPosition( + groupingViolationIndex(position, prevSeparatorIndex), intIndex); } prevSeparatorIndex = position; // track previous } else { @@ -2621,7 +2646,8 @@ public class DecimalFormat extends NumberFormat { // "1,234%" and "1,234" both end with pos = 5, since '%' breaks // the loop before incrementing position. In both cases, check // should be done at pos = 4 - return groupingViolationIndex(position - 1, prevSeparatorIndex); + return new NumericPosition( + groupingViolationIndex(position - 1, prevSeparatorIndex), intIndex); } } @@ -2636,8 +2662,9 @@ public class DecimalFormat extends NumberFormat { digits.decimalAt = digitCount; // Not digits.count! } - // Adjust for exponent, if any - if (exponent != 0) { + // If parsing integer only, adjust exponent if it occurs + // in integer portion, otherwise ignore it + if (!sawDecimal || !isParseIntegerOnly()) { digits.decimalAt = shiftDecimalAt(digits.decimalAt, exponent); } @@ -2646,10 +2673,10 @@ public class DecimalFormat extends NumberFormat { // parse "$" with pattern "$#0.00". (return index 0 and error // index 1). if (!sawDigit && digitCount == 0) { - return -1; + return new NumericPosition(-1, intIndex); } } - return position; + return new NumericPosition(position, intIndex); } // Calculate the final decimal position based off the exponent value @@ -2917,7 +2944,8 @@ public class DecimalFormat extends NumberFormat { * have '{@code U+2030}'. * *

    Example: with multiplier 100, 1.23 is formatted as "123", and - * "123" is parsed into 1.23. + * "123" is parsed into 1.23. If {@code isParseIntegerOnly()} returns {@code true}, + * "123" is parsed into 1. * * @param newValue the new multiplier * @see #getMultiplier diff --git a/src/java.base/share/classes/java/text/NumberFormat.java b/src/java.base/share/classes/java/text/NumberFormat.java index 0409efc2da0..28f116042e9 100644 --- a/src/java.base/share/classes/java/text/NumberFormat.java +++ b/src/java.base/share/classes/java/text/NumberFormat.java @@ -468,12 +468,11 @@ public abstract class NumberFormat extends Format { } /** - * Returns true if this format will parse numbers as integers only. + * Returns {@code true} if this format will parse numbers as integers only. + * The {@code ParsePosition} index will be set to the position of the decimal + * symbol. The exact format accepted by the parse operation is locale dependent. * For example in the English locale, with ParseIntegerOnly true, the - * string "1234." would be parsed as the integer value 1234 and parsing - * would stop at the "." character. Of course, the exact format accepted - * by the parse operation is locale dependent and determined by sub-classes - * of NumberFormat. + * string "123.45" would be parsed as the integer value 123. * * @return {@code true} if numbers should be parsed as integers only; * {@code false} otherwise diff --git a/test/jdk/java/text/Format/NumberFormat/BigDecimalParse.java b/test/jdk/java/text/Format/NumberFormat/BigDecimalParse.java index 771ae761a3a..1d8168cd054 100644 --- a/test/jdk/java/text/Format/NumberFormat/BigDecimalParse.java +++ b/test/jdk/java/text/Format/NumberFormat/BigDecimalParse.java @@ -23,14 +23,18 @@ /* * @test - * @bug 4018937 8008577 8174269 + * @bug 4018937 8008577 8174269 8333755 * @summary Confirm that methods which are newly added to support BigDecimal and BigInteger work as expected. * @run junit/othervm BigDecimalParse */ import java.math.BigDecimal; -import java.text.*; -import java.util.*; +import java.text.DecimalFormat; +import java.text.Format; +import java.text.MessageFormat; +import java.text.NumberFormat; +import java.text.ParsePosition; +import java.util.Locale; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.BeforeAll; @@ -543,23 +547,23 @@ public class BigDecimalParse { static final int[][] parsePosition2 = { // {errorIndex, index} /* * Should keep in mind that the expected result is different from - * DecimalFormat.parse() for some cases. + * DecimalFormat.parse() for some cases. This is because parsing integer + * only will return a successful parse for the subformat, but since the index + * returned is not equal to the length, at the MessageFormat level, this + * will be interpreted as a failed parse, and so the DecimalFormat index + * should be reflected as the MessageFormat errorIndex. */ {28, 0}, // parsing stopped at '.' {29, 0}, // parsing stopped at '.' {29, 0}, // parsing stopped at '.' - {2, 0}, // parsing stopped at '(' because cannot find ')' - {2, 0}, // parsing stopped at the first numeric - // because cannot find '%' - {2, 0}, // parsing stopped at the first numeric - // because cannot find '%' + {30, 0}, // parsing stopped at '.' + {31, 0}, // parsing stopped at '.' + {32, 0}, // parsing stopped at '.' {28, 0}, // parsing stopped at '.' {29, 0}, // parsing stopped at '.' - {-1, 57}, {-1, 58}, {-1, 59}, {-1, 61}, {56, 0}, // parsing stopped at '.' - // because cannot find '%' - {2, 0}, // parsing stopped at '(' because cannot find ')' + {57, 0}, // parsing stopped at '.' {-1, 60}, {-1, 61}, {28, 0}, // parsing stopped at '.' {-1, 88}, diff --git a/test/jdk/java/text/Format/NumberFormat/StrictParseTest.java b/test/jdk/java/text/Format/NumberFormat/StrictParseTest.java index d62a867e573..333bc1b0506 100644 --- a/test/jdk/java/text/Format/NumberFormat/StrictParseTest.java +++ b/test/jdk/java/text/Format/NumberFormat/StrictParseTest.java @@ -23,7 +23,7 @@ /* * @test - * @bug 8327640 8331485 + * @bug 8327640 8331485 8333755 * @summary Test suite for NumberFormat parsing with strict leniency * @run junit/othervm -Duser.language=en -Duser.country=US StrictParseTest * @run junit/othervm -Duser.language=ja -Duser.country=JP StrictParseTest @@ -34,6 +34,7 @@ * @run junit/othervm -Duser.language=ar -Duser.country=AR StrictParseTest */ +import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.condition.EnabledIfSystemProperty; import org.junit.jupiter.params.ParameterizedTest; @@ -72,17 +73,18 @@ public class StrictParseTest { private static final CompactNumberFormat cmpctFmt = (CompactNumberFormat) NumberFormat.getCompactNumberInstance(Locale.getDefault(), NumberFormat.Style.SHORT); + private static final NumberFormat[] FORMATS = new NumberFormat[]{dFmt, cFmt, pFmt, cmpctFmt}; - - // All NumberFormats should parse strictly - static { - dFmt.setStrict(true); - pFmt.setStrict(true); - cFmt.setStrict(true); - cmpctFmt.setStrict(true); - // To effectively test strict compactNumberFormat parsing - cmpctFmt.setParseIntegerOnly(false); - cmpctFmt.setGroupingUsed(true); + // Restore defaults before runs + @BeforeEach + void beforeEach() { + for (NumberFormat fmt : FORMATS) { + fmt.setStrict(true); + fmt.setParseIntegerOnly(false); + fmt.setGroupingUsed(true); + } + // Grouping Size is not defined at NumberFormat level + // Compact needs to manually init grouping size cmpctFmt.setGroupingSize(3); } @@ -114,6 +116,20 @@ public class StrictParseTest { failParse(nonLocalizedDFmt, "a123456,7890b", 6); } + + // 8333755: Check that parsing with integer only against a suffix value works + @Test // Non-localized, run once + @EnabledIfSystemProperty(named = "user.language", matches = "en") + public void integerOnlyParseWithSuffixTest() { + // Some pattern with a suffix + DecimalFormat fmt = new DecimalFormat("0.00b"); + fmt.setParseIntegerOnly(true); + assertEquals(5d, successParse(fmt, "5.55b", 1)); + assertEquals(5d, successParse(fmt, "5b", 2)); + assertEquals(5555d, successParse(fmt, "5,555.55b", 5)); + assertEquals(5d, successParse(fmt, "5.55E55b", 1)); + } + @Test // Non-localized, only run once @EnabledIfSystemProperty(named = "user.language", matches = "en") public void badExponentParseNumberFormatTest() { @@ -170,24 +186,30 @@ public class StrictParseTest { } else { successParse(dFmt, toParse); } - dFmt.setGroupingUsed(true); } - // Exception should be thrown if decimal separator occurs anywhere - // Don't pass badParseStrings for same reason as previous method. + // 8333755: Parsing behavior should follow normal strict behavior + // However the index returned, should be before decimal point + // and the value parsed equal to the integer portion @ParameterizedTest - @MethodSource({"validParseStrings", "integerOnlyParseStrings"}) - public void numFmtStrictIntegerOnlyUsed(String toParse) { - // When integer only is true, if a decimal separator is found, - // a failure should occur + @MethodSource("validIntegerOnlyParseStrings") + public void numFmtStrictIntegerOnlyUsedTest(String toParse, Number expVal) { dFmt.setParseIntegerOnly(true); - int failIndex = toParse.indexOf(dfs.getDecimalSeparator()); - if (failIndex > -1) { - failParse(dFmt, toParse, failIndex); + int expectedIndex = toParse.indexOf(dfs.getDecimalSeparator()); + if (expectedIndex > -1) { + assertEquals(successParse(dFmt, toParse, expectedIndex), expVal); } else { - successParse(dFmt, toParse); + assertEquals(successParse(dFmt, toParse), expVal); } - dFmt.setParseIntegerOnly(false); + } + + // 8333755: Parsing behavior should follow normal strict behavior + // when it comes to failures. + @ParameterizedTest + @MethodSource("badParseStrings") + public void numFmtStrictIntegerOnlyUsedFailTest(String toParse, int expectedErrorIndex) { + dFmt.setParseIntegerOnly(true); + failParse(dFmt, toParse, expectedErrorIndex); } // ---- CurrencyFormat tests ---- @@ -270,6 +292,18 @@ public class StrictParseTest { failParse(cnf, "c1bb", 2); } + @ParameterizedTest + @MethodSource({"validIntegerOnlyParseStrings", "compactValidIntegerOnlyParseStrings"}) + @EnabledIfSystemProperty(named = "user.language", matches = "en") + public void compactFmtSuccessParseIntOnlyTest(String toParse, double expectedValue) { + // compact does not accept exponents + if (toParse.indexOf('E') > -1) { + return; + } + cmpctFmt.setParseIntegerOnly(true); + assertEquals(expectedValue, successParse(cmpctFmt, toParse, toParse.length())); + } + // Ensure that on failure, the original index of the PP remains the same @Test public void parsePositionIndexTest() { @@ -279,7 +313,13 @@ public class StrictParseTest { // ---- Helper test methods ---- // Should parse entire String successfully, and return correctly parsed value. - private double successParse(NumberFormat fmt, String toParse) { + private Number successParse(NumberFormat fmt, String toParse) { + return successParse(fmt, toParse, toParse.length()); + } + + // Overloaded method that allows for an expected ParsePosition index value + // that is not the string length. + private Number successParse(NumberFormat fmt, String toParse, int expectedIndex) { // For Strings that don't have grouping separators, we test them with // grouping off so that they do not fail under the expectation that // grouping symbols should occur @@ -292,7 +332,7 @@ public class StrictParseTest { assertDoesNotThrow(() -> fmt.parse(toParse, pp)); assertEquals(-1, pp.getErrorIndex(), "ParsePosition ErrorIndex is not in correct location"); - assertEquals(toParse.length(), pp.getIndex(), + assertEquals(expectedIndex, pp.getIndex(), "ParsePosition Index is not in correct location"); fmt.setGroupingUsed(true); return parsedValue.doubleValue(); @@ -388,7 +428,9 @@ public class StrictParseTest { Arguments.of("1.a", 2), Arguments.of(".22a", 3), Arguments.of(".1a1", 2), - Arguments.of("1,234,a", 5)) + Arguments.of("1,234,a", 5), + // Double decimal + Arguments.of("1,234..5", 5)) .map(args -> Arguments.of( localizeText(String.valueOf(args.get()[0])), args.get()[1])); } @@ -397,6 +439,8 @@ public class StrictParseTest { // Given as Arguments private static Stream validParseStrings() { return Stream.of( + Arguments.of("1,234.55", 1234.55d), + Arguments.of("1,234.5", 1234.5d), Arguments.of("1,234.00", 1234d), Arguments.of("1,234.0", 1234d), Arguments.of("1,234.", 1234d), @@ -414,17 +458,19 @@ public class StrictParseTest { localizeText(String.valueOf(args.get()[0])), args.get()[1])); } - // Separate test data set for integer only. Can not use "badParseStrings", as - // there is test data where the failure may occur from some other issue, - // not related to grouping - private static Stream integerOnlyParseStrings() { + // Separate test data set for integer only. + // Valid parse strings, that would parse successfully for integer/non-integer parse + private static Stream validIntegerOnlyParseStrings() { return Stream.of( - Arguments.of("234.a"), - Arguments.of("234.a1"), - Arguments.of("234.1"), - Arguments.of("234.1a"), - Arguments.of("234.")) - .map(args -> Arguments.of(localizeText(String.valueOf(args.get()[0])))); + Arguments.of("234", 234d), + Arguments.of("234.", 234d), + Arguments.of("234.1", 234d), + Arguments.of("1,234.1", 1234d), + Arguments.of("234.12345", 234d), + Arguments.of("234.543E23", 234d), + Arguments.of("234,000.55E22", 234000d), + Arguments.of("234E22", 234E22)) + .map(args -> Arguments.of(localizeText(String.valueOf(args.get()[0])), args.get()[1])); } // Separate test data set for no grouping. Can not use "badParseStrings", as @@ -514,6 +560,12 @@ public class StrictParseTest { ); } + private static Stream compactValidIntegerOnlyParseStrings() { + return validIntegerOnlyParseStrings().map(args -> Arguments.of( + args.get()[0] + "K", (double) args.get()[1] * 1000) + ); + } + // Replace the grouping and decimal separators with localized variants // Used during localization of data private static String localizeText(String text) { @@ -526,7 +578,10 @@ public class StrictParseTest { sb.append(dfs.getGroupingSeparator()); } else if (c == '.') { sb.append(dfs.getDecimalSeparator()); - } else if (c == '0') { + } else if (c == 'E') { + sb.append(dfs.getExponentSeparator()); + } + else if (c == '0') { sb.append(dfs.getZeroDigit()); } else { sb.append(c);