8333755: NumberFormat integer only parsing breaks when format has suffix

Reviewed-by: naoto
This commit is contained in:
Justin Lu 2024-06-26 17:10:09 +00:00
parent b5d589623c
commit bffc8484c3
5 changed files with 169 additions and 101 deletions

View File

@ -1724,7 +1724,7 @@ public final class CompactNumberFormat extends NumberFormat {
// Call DecimalFormat.subparseNumber() method to parse the
// number part of the input text
position = decimalFormat.subparseNumber(text, position,
digitList, false, false, status);
digitList, false, false, status).fullPos();
if (position == -1) {
// Unable to parse the number successfully
@ -1732,26 +1732,6 @@ public final class CompactNumberFormat extends NumberFormat {
pos.errorIndex = oldStart;
return null;
}
// If parse integer only is true and the parsing is broken at
// decimal point, then pass/ignore all digits and move pointer
// at the start of suffix, to process the suffix part
if (isParseIntegerOnly() && position < text.length()
&& text.charAt(position) == symbols.getDecimalSeparator()) {
position++; // Pass decimal character
for (; position < text.length(); ++position) {
char ch = text.charAt(position);
int digit = ch - symbols.getZeroDigit();
if (digit < 0 || digit > 9) {
digit = Character.digit(ch, 10);
// Parse all digit characters
if (!(digit >= 0 && digit <= 9)) {
break;
}
}
}
}
// Number parsed successfully; match prefix and
// suffix to obtain multiplier
pos.index = position;
@ -2372,6 +2352,8 @@ public final class CompactNumberFormat extends NumberFormat {
* parsed as the value {@code 1234000} (1234 (integer part) * 1000
* (thousand)) and the fractional part would be skipped.
* The exact format accepted by the parse operation is locale dependent.
* @implSpec This implementation does not set the {@code ParsePosition} index
* to the position of the decimal symbol, but rather the end of the string.
*
* @return {@code true} if compact numbers should be parsed as integers
* only; {@code false} otherwise

View File

@ -2150,10 +2150,7 @@ public class DecimalFormat extends NumberFormat {
* #getGroupingSize()} is not adhered to
* <li> {@link #isGroupingUsed()} returns {@code false}, and the grouping
* symbol is found
* <li> {@link #isParseIntegerOnly()} returns {@code true}, and the decimal
* separator is found
* <li> {@link #isGroupingUsed()} returns {@code true} and {@link
* #isParseIntegerOnly()} returns {@code false}, and the grouping
* <li> {@link #isGroupingUsed()} returns {@code true} and the grouping
* symbol occurs after the decimal separator
* <li> Any other characters are found, that are not the expected symbols,
* and are not digits that occur within the numerical portion
@ -2379,7 +2376,8 @@ public class DecimalFormat extends NumberFormat {
// position will serve as new index when success, otherwise it will
// serve as errorIndex when failure
position = subparseNumber(text, position, digits, true, isExponent, status);
NumericPosition pos = subparseNumber(text, position, digits, true, isExponent, status);
position = pos.fullPos;
// First character after the prefix was un-parseable, should
// fail regardless if lenient or strict.
@ -2422,9 +2420,15 @@ public class DecimalFormat extends NumberFormat {
return false;
}
// No failures, thus increment the index by the suffix
parsePosition.index = position +
(gotPositive ? positiveSuffix.length() : negativeSuffix.length());
// When parsing integer only, index should be int pos
// If intPos is 0, the entire value was integer
if (isParseIntegerOnly() && pos.intPos > 0) {
parsePosition.index = pos.intPos;
} else {
// increment the index by the suffix
parsePosition.index = position +
(gotPositive ? positiveSuffix.length() : negativeSuffix.length());
}
} else {
parsePosition.index = position;
}
@ -2437,6 +2441,19 @@ public class DecimalFormat extends NumberFormat {
return true;
}
/**
* NumericPosition is a helper record class that stores two indices of interest.
* {@code fullPos} is either the first unparseable character or -1 in case
* of no valid number parsed. {@code intPos} reflects the position of
* a parsed decimal symbol, if one exists. When parsing with {@code isParseIntegerOnly()},
* {@code fullPos} is used to match the suffix, and reset the {@code ParsePosition}
* index to {@code intPos}.
*
* @param fullPos an index that reflects the full traversal of the numerical String
* @param intPos an index that reflects the position of a parsed decimal symbol.
*/
record NumericPosition(int fullPos, int intPos) {}
/**
* Parses a number from the given {@code text}. The text is parsed
* beginning at {@code position}, until an unparseable character is seen.
@ -2449,14 +2466,15 @@ public class DecimalFormat extends NumberFormat {
* @param status upon return contains boolean status flags indicating
* whether the value is infinite and whether it is
* positive
* @return returns the position of the first unparseable character or
* -1 in case of no valid number parsed
* @return returns a {@code NumericPosition} that stores both a full
* traversal index, and an int only index.
*/
int subparseNumber(String text, int position,
DigitList digits, boolean checkExponent,
boolean isExponent, boolean[] status) {
NumericPosition subparseNumber(String text, int position,
DigitList digits, boolean checkExponent,
boolean isExponent, boolean[] status) {
// process digits or Inf, find decimal position
status[STATUS_INFINITE] = false;
int intIndex = 0;
if (!isExponent && text.regionMatches(position, symbols.getInfinity(), 0,
symbols.getInfinity().length())) {
position += symbols.getInfinity().length();
@ -2516,7 +2534,7 @@ public class DecimalFormat extends NumberFormat {
if (parseStrict && isGroupingUsed() && position == startPos + groupingSize
&& prevSeparatorIndex == -groupingSize && !sawDecimal
&& digit >= 0 && digit <= 9) {
return position;
return new NumericPosition(position, intIndex);
}
if (digit == 0) {
@ -2538,37 +2556,44 @@ public class DecimalFormat extends NumberFormat {
--digits.decimalAt;
} else {
++digitCount;
digits.append((char)(digit + '0'));
if (!sawDecimal || !isParseIntegerOnly()) {
digits.append((char)(digit + '0'));
}
}
} else if (digit > 0 && digit <= 9) { // [sic] digit==0 handled above
sawDigit = true;
++digitCount;
digits.append((char)(digit + '0'));
if (!sawDecimal || !isParseIntegerOnly()) {
digits.append((char) (digit + '0'));
}
// Cancel out backup setting (see grouping handler below)
backup = -1;
} else if (!isExponent && ch == decimal) {
// Check grouping size on decimal separator
if (parseStrict && isGroupingViolation(position, prevSeparatorIndex)) {
return groupingViolationIndex(position, prevSeparatorIndex);
return new NumericPosition(
groupingViolationIndex(position, prevSeparatorIndex), intIndex);
}
// If we're only parsing integers, or if we ALREADY saw the
// decimal, then don't parse this one.
if (isParseIntegerOnly() || sawDecimal) {
if (sawDecimal) {
break;
}
intIndex = position;
digits.decimalAt = digitCount; // Not digits.count!
sawDecimal = true;
} else if (!isExponent && ch == grouping && isGroupingUsed()) {
if (parseStrict) {
// text should not start with grouping when strict
if (position == startPos) {
return startPos;
return new NumericPosition(startPos, intIndex);
}
// when strict, fail if grouping occurs after decimal OR
// current group violates grouping size
if (sawDecimal || (isGroupingViolation(position, prevSeparatorIndex))) {
return groupingViolationIndex(position, prevSeparatorIndex);
return new NumericPosition(
groupingViolationIndex(position, prevSeparatorIndex), intIndex);
}
prevSeparatorIndex = position; // track previous
} else {
@ -2621,7 +2646,8 @@ public class DecimalFormat extends NumberFormat {
// "1,234%" and "1,234" both end with pos = 5, since '%' breaks
// the loop before incrementing position. In both cases, check
// should be done at pos = 4
return groupingViolationIndex(position - 1, prevSeparatorIndex);
return new NumericPosition(
groupingViolationIndex(position - 1, prevSeparatorIndex), intIndex);
}
}
@ -2636,8 +2662,9 @@ public class DecimalFormat extends NumberFormat {
digits.decimalAt = digitCount; // Not digits.count!
}
// Adjust for exponent, if any
if (exponent != 0) {
// If parsing integer only, adjust exponent if it occurs
// in integer portion, otherwise ignore it
if (!sawDecimal || !isParseIntegerOnly()) {
digits.decimalAt = shiftDecimalAt(digits.decimalAt, exponent);
}
@ -2646,10 +2673,10 @@ public class DecimalFormat extends NumberFormat {
// parse "$" with pattern "$#0.00". (return index 0 and error
// index 1).
if (!sawDigit && digitCount == 0) {
return -1;
return new NumericPosition(-1, intIndex);
}
}
return position;
return new NumericPosition(position, intIndex);
}
// Calculate the final decimal position based off the exponent value
@ -2917,7 +2944,8 @@ public class DecimalFormat extends NumberFormat {
* have '{@code U+2030}'.
*
* <P>Example: with multiplier 100, 1.23 is formatted as "123", and
* "123" is parsed into 1.23.
* "123" is parsed into 1.23. If {@code isParseIntegerOnly()} returns {@code true},
* "123" is parsed into 1.
*
* @param newValue the new multiplier
* @see #getMultiplier

View File

@ -468,12 +468,11 @@ public abstract class NumberFormat extends Format {
}
/**
* Returns true if this format will parse numbers as integers only.
* Returns {@code true} if this format will parse numbers as integers only.
* The {@code ParsePosition} index will be set to the position of the decimal
* symbol. The exact format accepted by the parse operation is locale dependent.
* For example in the English locale, with ParseIntegerOnly true, the
* string "1234." would be parsed as the integer value 1234 and parsing
* would stop at the "." character. Of course, the exact format accepted
* by the parse operation is locale dependent and determined by sub-classes
* of NumberFormat.
* string "123.45" would be parsed as the integer value 123.
*
* @return {@code true} if numbers should be parsed as integers only;
* {@code false} otherwise

View File

@ -23,14 +23,18 @@
/*
* @test
* @bug 4018937 8008577 8174269
* @bug 4018937 8008577 8174269 8333755
* @summary Confirm that methods which are newly added to support BigDecimal and BigInteger work as expected.
* @run junit/othervm BigDecimalParse
*/
import java.math.BigDecimal;
import java.text.*;
import java.util.*;
import java.text.DecimalFormat;
import java.text.Format;
import java.text.MessageFormat;
import java.text.NumberFormat;
import java.text.ParsePosition;
import java.util.Locale;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.BeforeAll;
@ -543,23 +547,23 @@ public class BigDecimalParse {
static final int[][] parsePosition2 = { // {errorIndex, index}
/*
* Should keep in mind that the expected result is different from
* DecimalFormat.parse() for some cases.
* DecimalFormat.parse() for some cases. This is because parsing integer
* only will return a successful parse for the subformat, but since the index
* returned is not equal to the length, at the MessageFormat level, this
* will be interpreted as a failed parse, and so the DecimalFormat index
* should be reflected as the MessageFormat errorIndex.
*/
{28, 0}, // parsing stopped at '.'
{29, 0}, // parsing stopped at '.'
{29, 0}, // parsing stopped at '.'
{2, 0}, // parsing stopped at '(' because cannot find ')'
{2, 0}, // parsing stopped at the first numeric
// because cannot find '%'
{2, 0}, // parsing stopped at the first numeric
// because cannot find '%'
{30, 0}, // parsing stopped at '.'
{31, 0}, // parsing stopped at '.'
{32, 0}, // parsing stopped at '.'
{28, 0}, // parsing stopped at '.'
{29, 0}, // parsing stopped at '.'
{-1, 57}, {-1, 58}, {-1, 59}, {-1, 61},
{56, 0}, // parsing stopped at '.'
// because cannot find '%'
{2, 0}, // parsing stopped at '(' because cannot find ')'
{57, 0}, // parsing stopped at '.'
{-1, 60}, {-1, 61},
{28, 0}, // parsing stopped at '.'
{-1, 88},

View File

@ -23,7 +23,7 @@
/*
* @test
* @bug 8327640 8331485
* @bug 8327640 8331485 8333755
* @summary Test suite for NumberFormat parsing with strict leniency
* @run junit/othervm -Duser.language=en -Duser.country=US StrictParseTest
* @run junit/othervm -Duser.language=ja -Duser.country=JP StrictParseTest
@ -34,6 +34,7 @@
* @run junit/othervm -Duser.language=ar -Duser.country=AR StrictParseTest
*/
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.condition.EnabledIfSystemProperty;
import org.junit.jupiter.params.ParameterizedTest;
@ -72,17 +73,18 @@ public class StrictParseTest {
private static final CompactNumberFormat cmpctFmt =
(CompactNumberFormat) NumberFormat.getCompactNumberInstance(Locale.getDefault(),
NumberFormat.Style.SHORT);
private static final NumberFormat[] FORMATS = new NumberFormat[]{dFmt, cFmt, pFmt, cmpctFmt};
// All NumberFormats should parse strictly
static {
dFmt.setStrict(true);
pFmt.setStrict(true);
cFmt.setStrict(true);
cmpctFmt.setStrict(true);
// To effectively test strict compactNumberFormat parsing
cmpctFmt.setParseIntegerOnly(false);
cmpctFmt.setGroupingUsed(true);
// Restore defaults before runs
@BeforeEach
void beforeEach() {
for (NumberFormat fmt : FORMATS) {
fmt.setStrict(true);
fmt.setParseIntegerOnly(false);
fmt.setGroupingUsed(true);
}
// Grouping Size is not defined at NumberFormat level
// Compact needs to manually init grouping size
cmpctFmt.setGroupingSize(3);
}
@ -114,6 +116,20 @@ public class StrictParseTest {
failParse(nonLocalizedDFmt, "a123456,7890b", 6);
}
// 8333755: Check that parsing with integer only against a suffix value works
@Test // Non-localized, run once
@EnabledIfSystemProperty(named = "user.language", matches = "en")
public void integerOnlyParseWithSuffixTest() {
// Some pattern with a suffix
DecimalFormat fmt = new DecimalFormat("0.00b");
fmt.setParseIntegerOnly(true);
assertEquals(5d, successParse(fmt, "5.55b", 1));
assertEquals(5d, successParse(fmt, "5b", 2));
assertEquals(5555d, successParse(fmt, "5,555.55b", 5));
assertEquals(5d, successParse(fmt, "5.55E55b", 1));
}
@Test // Non-localized, only run once
@EnabledIfSystemProperty(named = "user.language", matches = "en")
public void badExponentParseNumberFormatTest() {
@ -170,24 +186,30 @@ public class StrictParseTest {
} else {
successParse(dFmt, toParse);
}
dFmt.setGroupingUsed(true);
}
// Exception should be thrown if decimal separator occurs anywhere
// Don't pass badParseStrings for same reason as previous method.
// 8333755: Parsing behavior should follow normal strict behavior
// However the index returned, should be before decimal point
// and the value parsed equal to the integer portion
@ParameterizedTest
@MethodSource({"validParseStrings", "integerOnlyParseStrings"})
public void numFmtStrictIntegerOnlyUsed(String toParse) {
// When integer only is true, if a decimal separator is found,
// a failure should occur
@MethodSource("validIntegerOnlyParseStrings")
public void numFmtStrictIntegerOnlyUsedTest(String toParse, Number expVal) {
dFmt.setParseIntegerOnly(true);
int failIndex = toParse.indexOf(dfs.getDecimalSeparator());
if (failIndex > -1) {
failParse(dFmt, toParse, failIndex);
int expectedIndex = toParse.indexOf(dfs.getDecimalSeparator());
if (expectedIndex > -1) {
assertEquals(successParse(dFmt, toParse, expectedIndex), expVal);
} else {
successParse(dFmt, toParse);
assertEquals(successParse(dFmt, toParse), expVal);
}
dFmt.setParseIntegerOnly(false);
}
// 8333755: Parsing behavior should follow normal strict behavior
// when it comes to failures.
@ParameterizedTest
@MethodSource("badParseStrings")
public void numFmtStrictIntegerOnlyUsedFailTest(String toParse, int expectedErrorIndex) {
dFmt.setParseIntegerOnly(true);
failParse(dFmt, toParse, expectedErrorIndex);
}
// ---- CurrencyFormat tests ----
@ -270,6 +292,18 @@ public class StrictParseTest {
failParse(cnf, "c1bb", 2);
}
@ParameterizedTest
@MethodSource({"validIntegerOnlyParseStrings", "compactValidIntegerOnlyParseStrings"})
@EnabledIfSystemProperty(named = "user.language", matches = "en")
public void compactFmtSuccessParseIntOnlyTest(String toParse, double expectedValue) {
// compact does not accept exponents
if (toParse.indexOf('E') > -1) {
return;
}
cmpctFmt.setParseIntegerOnly(true);
assertEquals(expectedValue, successParse(cmpctFmt, toParse, toParse.length()));
}
// Ensure that on failure, the original index of the PP remains the same
@Test
public void parsePositionIndexTest() {
@ -279,7 +313,13 @@ public class StrictParseTest {
// ---- Helper test methods ----
// Should parse entire String successfully, and return correctly parsed value.
private double successParse(NumberFormat fmt, String toParse) {
private Number successParse(NumberFormat fmt, String toParse) {
return successParse(fmt, toParse, toParse.length());
}
// Overloaded method that allows for an expected ParsePosition index value
// that is not the string length.
private Number successParse(NumberFormat fmt, String toParse, int expectedIndex) {
// For Strings that don't have grouping separators, we test them with
// grouping off so that they do not fail under the expectation that
// grouping symbols should occur
@ -292,7 +332,7 @@ public class StrictParseTest {
assertDoesNotThrow(() -> fmt.parse(toParse, pp));
assertEquals(-1, pp.getErrorIndex(),
"ParsePosition ErrorIndex is not in correct location");
assertEquals(toParse.length(), pp.getIndex(),
assertEquals(expectedIndex, pp.getIndex(),
"ParsePosition Index is not in correct location");
fmt.setGroupingUsed(true);
return parsedValue.doubleValue();
@ -388,7 +428,9 @@ public class StrictParseTest {
Arguments.of("1.a", 2),
Arguments.of(".22a", 3),
Arguments.of(".1a1", 2),
Arguments.of("1,234,a", 5))
Arguments.of("1,234,a", 5),
// Double decimal
Arguments.of("1,234..5", 5))
.map(args -> Arguments.of(
localizeText(String.valueOf(args.get()[0])), args.get()[1]));
}
@ -397,6 +439,8 @@ public class StrictParseTest {
// Given as Arguments<String, expectedParsedNumber>
private static Stream<Arguments> validParseStrings() {
return Stream.of(
Arguments.of("1,234.55", 1234.55d),
Arguments.of("1,234.5", 1234.5d),
Arguments.of("1,234.00", 1234d),
Arguments.of("1,234.0", 1234d),
Arguments.of("1,234.", 1234d),
@ -414,17 +458,19 @@ public class StrictParseTest {
localizeText(String.valueOf(args.get()[0])), args.get()[1]));
}
// Separate test data set for integer only. Can not use "badParseStrings", as
// there is test data where the failure may occur from some other issue,
// not related to grouping
private static Stream<Arguments> integerOnlyParseStrings() {
// Separate test data set for integer only.
// Valid parse strings, that would parse successfully for integer/non-integer parse
private static Stream<Arguments> validIntegerOnlyParseStrings() {
return Stream.of(
Arguments.of("234.a"),
Arguments.of("234.a1"),
Arguments.of("234.1"),
Arguments.of("234.1a"),
Arguments.of("234."))
.map(args -> Arguments.of(localizeText(String.valueOf(args.get()[0]))));
Arguments.of("234", 234d),
Arguments.of("234.", 234d),
Arguments.of("234.1", 234d),
Arguments.of("1,234.1", 1234d),
Arguments.of("234.12345", 234d),
Arguments.of("234.543E23", 234d),
Arguments.of("234,000.55E22", 234000d),
Arguments.of("234E22", 234E22))
.map(args -> Arguments.of(localizeText(String.valueOf(args.get()[0])), args.get()[1]));
}
// Separate test data set for no grouping. Can not use "badParseStrings", as
@ -514,6 +560,12 @@ public class StrictParseTest {
);
}
private static Stream<Arguments> compactValidIntegerOnlyParseStrings() {
return validIntegerOnlyParseStrings().map(args -> Arguments.of(
args.get()[0] + "K", (double) args.get()[1] * 1000)
);
}
// Replace the grouping and decimal separators with localized variants
// Used during localization of data
private static String localizeText(String text) {
@ -526,7 +578,10 @@ public class StrictParseTest {
sb.append(dfs.getGroupingSeparator());
} else if (c == '.') {
sb.append(dfs.getDecimalSeparator());
} else if (c == '0') {
} else if (c == 'E') {
sb.append(dfs.getExponentSeparator());
}
else if (c == '0') {
sb.append(dfs.getZeroDigit());
} else {
sb.append(c);