8352628: Refine Grapheme test

Reviewed-by: jlu, joehw
This commit is contained in:
Naoto Sato 2025-03-24 16:15:45 +00:00
parent d8c2f59a1b
commit ddc4318053

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 1999, 2023, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1999, 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -49,7 +49,6 @@ import java.io.*;
import java.math.BigInteger;
import java.nio.CharBuffer;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.*;
import java.util.function.Function;
@ -62,8 +61,8 @@ import java.util.regex.PatternSyntaxException;
import java.util.stream.IntStream;
import java.util.stream.Stream;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import org.testng.Assert;
import jdk.test.lib.RandomFactory;
@ -4148,87 +4147,85 @@ public class RegExTest {
Pattern.compile("(?imsducxU).(?-imsducxU).");
}
@Test
public static void grapheme() throws Exception {
final int[] lineNumber = new int[1];
Stream.concat(Files.lines(UCDFiles.GRAPHEME_BREAK_TEST),
@DataProvider
private static String[] graphemeTestCases() throws Exception {
return Stream.concat(Files.lines(UCDFiles.GRAPHEME_BREAK_TEST),
Files.lines(Paths.get(System.getProperty("test.src", "."), "GraphemeTestCases.txt")))
.forEach( ln -> {
lineNumber[0]++;
if (ln.length() == 0 || ln.startsWith("#")) {
return;
}
ln = ln.replaceAll("\\s+|\\([a-zA-Z]+\\)|\\[[a-zA-Z]]+\\]|#.*", "");
// System.out.println(str);
String[] strs = ln.split("\u00f7|\u00d7");
StringBuilder src = new StringBuilder();
ArrayList<String> graphemes = new ArrayList<>();
StringBuilder buf = new StringBuilder();
int offBk = 0;
for (String str : strs) {
if (str.length() == 0) // first empty str
continue;
int cp = Integer.parseInt(str, 16);
src.appendCodePoint(cp);
buf.appendCodePoint(cp);
offBk += (str.length() + 1);
if (ln.charAt(offBk) == '\u00f7') { // DIV
graphemes.add(buf.toString());
buf = new StringBuilder();
}
}
Pattern p = Pattern.compile("\\X");
// (1) test \X directly
Matcher m = p.matcher(src.toString());
for (String g : graphemes) {
// System.out.printf(" grapheme:=[%s]%n", g);
String group = null;
if (!m.find() || !(group = m.group()).equals(g)) {
fail("Failed pattern \\X [" + ln + "] : "
+ "expected: " + g + " - actual: " + group
+ "(line " + lineNumber[0] + ")");
}
}
assertFalse(m.find());
// test \b{g} without \X via Pattern
Pattern pbg = Pattern.compile("\\b{g}");
m = pbg.matcher(src.toString());
m.find();
int prev = m.end();
for (String g : graphemes) {
String group = null;
if (!m.find() || !(group = src.substring(prev, m.end())).equals(g)) {
fail("Failed pattern \\b{g} [" + ln + "] : "
+ "expected: " + g + " - actual: " + group
+ "(line " + lineNumber[0] + ")");
}
assertEquals("", m.group());
prev = m.end();
}
assertFalse(m.find());
// (2) test \b{g} + \X via Scanner
Scanner s = new Scanner(src.toString()).useDelimiter("\\b{g}");
for (String g : graphemes) {
String next = null;
if (!s.hasNext(p) || !(next = s.next(p)).equals(g)) {
fail("Failed \\b{g} [" + ln + "] : "
+ "expected: " + g + " - actual: " + next
+ " (line " + lineNumber[0] + ")");
}
}
assertFalse(s.hasNext(p));
// test \b{g} without \X via Scanner
s = new Scanner(src.toString()).useDelimiter("\\b{g}");
for (String g : graphemes) {
String next = null;
if (!s.hasNext() || !(next = s.next()).equals(g)) {
fail("Failed \\b{g} [" + ln + "] : "
+ "expected: " + g + " - actual: " + next
+ " (line " + lineNumber[0] + ")");
}
}
assertFalse(s.hasNext());
});
.filter(line -> !line.isEmpty() && !line.startsWith("#"))
.toArray(String[]::new);
}
@Test(dataProvider = "graphemeTestCases")
public static void grapheme(String line) throws Exception {
String tc = line.replaceAll("\\s+|\\([a-zA-Z]+\\)|\\[[a-zA-Z]]+]|#.*", "");
String[] strs = tc.split("\u00f7|\u00d7");
StringBuilder src = new StringBuilder();
ArrayList<String> graphemes = new ArrayList<>();
StringBuilder buf = new StringBuilder();
int offBk = 0;
for (String str : strs) {
if (str.length() == 0) // first empty str
continue;
int cp = Integer.parseInt(str, 16);
src.appendCodePoint(cp);
buf.appendCodePoint(cp);
offBk += (str.length() + 1);
if (tc.charAt(offBk) == '\u00f7') { // DIV
graphemes.add(buf.toString());
buf = new StringBuilder();
}
}
Pattern p = Pattern.compile("\\X");
// (1) test \X directly
Matcher m = p.matcher(src.toString());
for (String g : graphemes) {
// System.out.printf(" grapheme:=[%s]%n", g);
String group = null;
if (!m.find() || !(group = m.group()).equals(g)) {
fail("Failed pattern \\X [" + tc + "] : "
+ "expected: " + g + " - actual: " + group);
}
}
assertFalse(m.find());
// test \b{g} without \X via Pattern
Pattern pbg = Pattern.compile("\\b{g}");
m = pbg.matcher(src.toString());
m.find();
int prev = m.end();
for (String g : graphemes) {
String group = null;
if (!m.find() || !(group = src.substring(prev, m.end())).equals(g)) {
fail("Failed pattern \\b{g} [" + tc + "] : "
+ "expected: " + g + " - actual: " + group);
}
assertEquals("", m.group());
prev = m.end();
}
assertFalse(m.find());
// (2) test \b{g} + \X via Scanner
Scanner s = new Scanner(src.toString()).useDelimiter("\\b{g}");
for (String g : graphemes) {
String next = null;
if (!s.hasNext(p) || !(next = s.next(p)).equals(g)) {
fail("Failed \\b{g} [" + tc + "] : "
+ "expected: " + g + " - actual: " + next);
}
}
assertFalse(s.hasNext(p));
// test \b{g} without \X via Scanner
s = new Scanner(src.toString()).useDelimiter("\\b{g}");
for (String g : graphemes) {
String next = null;
if (!s.hasNext() || !(next = s.next()).equals(g)) {
fail("Failed \\b{g} [" + tc + "] : "
+ "expected: " + g + " - actual: " + next);
}
}
assertFalse(s.hasNext());
}
@Test
public static void graphemeSanity() {
// some sanity checks
assertTrue(Pattern.compile("\\X{10}").matcher("abcdefghij").matches() &&
Pattern.compile("\\b{g}(?:\\X\\b{g}){5}\\b{g}").matcher("abcde").matches() &&