From c25f35205ae4544970bbaca233de8745f8e4e92c Mon Sep 17 00:00:00 2001 From: Daniel Hu Date: Mon, 27 Oct 2025 16:48:51 +0000 Subject: [PATCH] 8341735: Rewrite the build/AbsPathsInImage.java test to not load the entire file at once Reviewed-by: erikj --- test/jdk/build/AbsPathsInImage.java | 203 ++++++++++++++++++++-------- 1 file changed, 145 insertions(+), 58 deletions(-) diff --git a/test/jdk/build/AbsPathsInImage.java b/test/jdk/build/AbsPathsInImage.java index 1aa7e59941e..7b2c60c3dda 100644 --- a/test/jdk/build/AbsPathsInImage.java +++ b/test/jdk/build/AbsPathsInImage.java @@ -21,6 +21,7 @@ * questions. */ +import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; import java.nio.file.FileVisitResult; @@ -35,6 +36,8 @@ import java.util.Properties; import java.util.zip.ZipEntry; import java.util.zip.ZipInputStream; +import static java.util.Comparator.comparing; + /* * @test * @bug 8226346 @@ -42,7 +45,7 @@ import java.util.zip.ZipInputStream; * @requires !vm.debug * @comment ASAN keeps the 'unwanted' paths in the binaries because of its build options * @requires !vm.asan - * @run main/othervm -Xmx900m AbsPathsInImage + * @run main AbsPathsInImage */ public class AbsPathsInImage { @@ -51,9 +54,14 @@ public class AbsPathsInImage { public static final String DIR_PROPERTY = "jdk.test.build.AbsPathsInImage.dir"; private static final boolean IS_WINDOWS = System.getProperty("os.name").toLowerCase().contains("windows"); private static final boolean IS_LINUX = System.getProperty("os.name").toLowerCase().contains("linux"); + private static final int DEFAULT_BUFFER_SIZE = 8192; + private static List searchPatterns = new ArrayList<>(); + private static List prefixTables = new ArrayList<>(); private boolean matchFound = false; + record Match(int begin, int end) { } + public static void main(String[] args) throws Exception { String jdkPathString = System.getProperty("test.jdk"); Path jdkHome = Paths.get(jdkPathString); @@ -107,9 +115,9 @@ public class AbsPathsInImage { throw new Error("Output root is not an absolute path: " + buildOutputRoot); } - List searchPatterns = new ArrayList<>(); - expandPatterns(searchPatterns, buildWorkspaceRoot); - expandPatterns(searchPatterns, buildOutputRoot); + expandPatterns(buildWorkspaceRoot); + expandPatterns(buildOutputRoot); + createPrefixTables(); System.out.println("Looking for:"); for (byte[] searchPattern : searchPatterns) { @@ -118,7 +126,7 @@ public class AbsPathsInImage { System.out.println(); AbsPathsInImage absPathsInImage = new AbsPathsInImage(); - absPathsInImage.scanFiles(dirToScan, searchPatterns); + absPathsInImage.scanFiles(dirToScan); if (absPathsInImage.matchFound) { throw new Exception("Test failed"); @@ -129,7 +137,7 @@ public class AbsPathsInImage { * Add path pattern to list of patterns to search for. Create all possible * variants depending on platform. */ - private static void expandPatterns(List searchPatterns, String pattern) { + private static void expandPatterns(String pattern) { if (IS_WINDOWS) { String forward = pattern.replace('\\', '/'); String back = pattern.replace('/', '\\'); @@ -151,7 +159,42 @@ public class AbsPathsInImage { } } - private void scanFiles(Path root, List searchPatterns) throws IOException { + /** + * The failure function for KMP. Returns the correct index in the pattern to jump + * back to when encountering a mismatched character. Used in both + * createPrefixTables (pre-processing) and scanBytes (matching). + */ + private static int getPrefixIndex(int patternIdx, int state, byte match) { + if (state == 0) { + return 0; + } + byte[] searchPattern = searchPatterns.get(patternIdx); + int[] prefixTable = prefixTables.get(patternIdx); + int i = prefixTable[state - 1]; + while (i > 0 && searchPattern[i] != match) { + i = prefixTable[i - 1]; + } + return searchPattern[i] == match ? i + 1 : i; + } + + /** + * Pre-processing string patterns for Knuth–Morris–Pratt (KMP) search algorithm. + * Lookup tables of longest prefixes at each given index are created for each + * search pattern string. These tables are later used in scanBytes during matching + * as lookups for failure state transitions. + */ + private static void createPrefixTables() { + for (int patternIdx = 0; patternIdx < searchPatterns.size(); patternIdx++) { + int patternLen = searchPatterns.get(patternIdx).length; + int[] prefixTable = new int[patternLen]; + prefixTables.add(prefixTable); + for (int i = 1; i < patternLen; i++) { + prefixTable[i] = getPrefixIndex(patternIdx, i, searchPatterns.get(patternIdx)[i]); + } + } + } + + private void scanFiles(Path root) throws IOException { Files.walkFileTree(root, new SimpleFileVisitor<>() { @Override public FileVisitResult preVisitDirectory(Path dir, BasicFileAttributes attrs) throws IOException { @@ -170,84 +213,128 @@ public class AbsPathsInImage { } else if ((fileName.endsWith(".debuginfo") && !IS_LINUX) || fileName.endsWith(".pdb")) { // Do nothing } else if (fileName.endsWith(".zip")) { - scanZipFile(file, searchPatterns); + scanZipFile(file); } else { - scanFile(file, searchPatterns); + scanFile(file); } return super.visitFile(file, attrs); } }); } - private void scanFile(Path file, List searchPatterns) throws IOException { - List matches = scanBytes(Files.readAllBytes(file), searchPatterns); - if (matches.size() > 0) { - matchFound = true; - System.out.println(file + ":"); - for (String match : matches) { - System.out.println(match); - } - System.out.println(); + private void scanFile(Path file) throws IOException { + List matches; + try (InputStream inputStream = Files.newInputStream(file)) { + matches = scanBytes(inputStream); + } + // test succeeds + if (matches.size() == 0) { + return; + } + // test fails; pay penalty and re-scan file for debug output + try (InputStream inputStream = Files.newInputStream(file)) { + printDebugOutput(inputStream, matches, file + ":"); } } - private void scanZipFile(Path zipFile, List searchPatterns) throws IOException { + private void scanZipFile(Path zipFile) throws IOException { + List> entryMatches = new ArrayList<>(); + boolean found = false; + ZipEntry zipEntry; try (ZipInputStream zipInputStream = new ZipInputStream(Files.newInputStream(zipFile))) { - ZipEntry zipEntry; while ((zipEntry = zipInputStream.getNextEntry()) != null) { - List matches = scanBytes(zipInputStream.readAllBytes(), searchPatterns); + List matches = scanBytes(zipInputStream); if (matches.size() > 0) { - matchFound = true; - System.out.println(zipFile + ", " + zipEntry.getName() + ":"); - for (String match : matches) { - System.out.println(match); - } - System.out.println(); + entryMatches.add(matches); + found = true; + } else { + entryMatches.add(null); + } + } + } + // test succeeds + if (!found) { + return; + } + // test fails + try (ZipInputStream zipInputStream = new ZipInputStream(Files.newInputStream(zipFile))) { + int i = 0; + while ((zipEntry = zipInputStream.getNextEntry()) != null) { + List matches = entryMatches.get(i); + i++; + if (matches != null) { + printDebugOutput(zipInputStream, matches, zipFile + ", " + zipEntry.getName() + ":"); } } } } - private List scanBytes(byte[] data, List searchPatterns) { - List matches = new ArrayList<>(); - for (int i = 0; i < data.length; i++) { - for (byte[] searchPattern : searchPatterns) { - boolean found = true; - for (int j = 0; j < searchPattern.length; j++) { - if ((i + j >= data.length || data[i + j] != searchPattern[j])) { - found = false; + /** + * Scans each byte until encounters a match with one of searchPatterns. Uses KMP to + * perform matches. Keep track of current matched index (states) for each search + * pattern. At each given byte, update states accordingly (increment if match or + * failure function transition if mismatch). Returns a list of Match objects. + */ + private List scanBytes(InputStream input) throws IOException { + List matches = new ArrayList<>(); + byte[] buf = new byte[DEFAULT_BUFFER_SIZE]; + int[] states = new int[searchPatterns.size()]; + int fileIdx = 0; + int bytesRead, patternLen; + while ((bytesRead = input.read(buf)) != -1) { + for (int bufIdx = 0; bufIdx < bytesRead; bufIdx++, fileIdx++) { + byte datum = buf[bufIdx]; + for (int i = 0; i < searchPatterns.size(); i++) { + patternLen = searchPatterns.get(i).length; + if (datum != searchPatterns.get(i)[states[i]]) { + states[i] = getPrefixIndex(i, states[i], datum); + } else if (++states[i] == patternLen) { + // technically at last match, state should reset according to failure function + // but in original test, matching didn't search same string for multiple matches + states[i] = 0; + matches.add(new Match(fileIdx - patternLen + 1, fileIdx)); break; } } - if (found) { - matches.add(new String(data, charsStart(data, i), charsOffset(data, i, searchPattern.length))); - // No need to search the same string for multiple patterns - break; - } } } return matches; } - private int charsStart(byte[] data, int startIndex) { - int index = startIndex; - while (--index > 0) { - byte datum = data[index]; - if (datum < 32 || datum > 126) { - break; + /** + * In original test, failed test output would backtrack to last non-ascii byte on + * matched pattern. This is incompatible with the new buffered approach (and a + * proper solution requires a 2nd dynamic buffer). Instead, on failed test case, + * files are scanned a 2nd time to print debug output. Failed runs will pay + * additional performance/space penalty, but passing runs are faster. + */ + private void printDebugOutput(InputStream input, List matches, final String HEADER) throws IOException{ + matchFound = true; + System.out.println(HEADER); + matches.sort(comparing(Match::begin)); + ByteArrayOutputStream output = new ByteArrayOutputStream(); + byte[] buf = new byte[DEFAULT_BUFFER_SIZE]; + int matchIdx = 0; + int fileIdx = 0; + int bytesRead; + while (matchIdx < matches.size() && (bytesRead = input.read(buf)) != -1) { + for (int i = 0; matchIdx < matches.size() && i < bytesRead; i++, fileIdx++) { + byte datum = buf[i]; + if (datum >= 32 && datum <= 126) { + output.write(datum); + } else if (fileIdx < matches.get(matchIdx).begin()) { + output.reset(); + } else if (fileIdx > matches.get(matchIdx).end()) { + System.out.println(output.toString()); + output.reset(); + // This imperfect as incorrect in edge cases with patterns containing non-ascii? + // but high-accuracy not priority + output still legible and useful + for (; matchIdx < matches.size() && matches.get(matchIdx).end() < fileIdx; matchIdx++); + } else { + output.write(datum); + } } } - return index + 1; - } - - private int charsOffset(byte[] data, int startIndex, int startOffset) { - int offset = startOffset; - while (startIndex + ++offset < data.length) { - byte datum = data[startIndex + offset]; - if (datum < 32 || datum > 126) { - break; - } - } - return offset; + System.out.println(); } }