From c25f35205ae4544970bbaca233de8745f8e4e92c Mon Sep 17 00:00:00 2001
From: Daniel Hu <costmuch@amazon.com>
Date: Mon, 27 Oct 2025 16:48:51 +0000
Subject: [PATCH] 8341735: Rewrite the build/AbsPathsInImage.java test to not
 load the entire file at once

Reviewed-by: erikj
---
 test/jdk/build/AbsPathsInImage.java | 203 ++++++++++++++++++++--------
 1 file changed, 145 insertions(+), 58 deletions(-)

diff --git a/test/jdk/build/AbsPathsInImage.java b/test/jdk/build/AbsPathsInImage.java
index 1aa7e59941e..7b2c60c3dda 100644
--- a/test/jdk/build/AbsPathsInImage.java
+++ b/test/jdk/build/AbsPathsInImage.java
@@ -21,6 +21,7 @@
  * questions.
  */
 
+import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.nio.file.FileVisitResult;
@@ -35,6 +36,8 @@ import java.util.Properties;
 import java.util.zip.ZipEntry;
 import java.util.zip.ZipInputStream;
 
+import static java.util.Comparator.comparing;
+
 /*
  * @test
  * @bug 8226346
@@ -42,7 +45,7 @@ import java.util.zip.ZipInputStream;
  * @requires !vm.debug
  * @comment ASAN keeps the 'unwanted' paths in the binaries because of its build options
  * @requires !vm.asan
- * @run main/othervm -Xmx900m AbsPathsInImage
+ * @run main AbsPathsInImage
  */
 public class AbsPathsInImage {
 
@@ -51,9 +54,14 @@ public class AbsPathsInImage {
     public static final String DIR_PROPERTY = "jdk.test.build.AbsPathsInImage.dir";
     private static final boolean IS_WINDOWS = System.getProperty("os.name").toLowerCase().contains("windows");
     private static final boolean IS_LINUX   = System.getProperty("os.name").toLowerCase().contains("linux");
+    private static final int DEFAULT_BUFFER_SIZE = 8192;
+    private static List<byte[]> searchPatterns = new ArrayList<>();
+    private static List<int[]> prefixTables = new ArrayList<>();
 
     private boolean matchFound = false;
 
+    record Match(int begin, int end) { }
+
     public static void main(String[] args) throws Exception {
         String jdkPathString = System.getProperty("test.jdk");
         Path jdkHome = Paths.get(jdkPathString);
@@ -107,9 +115,9 @@ public class AbsPathsInImage {
             throw new Error("Output root is not an absolute path: " + buildOutputRoot);
         }
 
-        List<byte[]> searchPatterns = new ArrayList<>();
-        expandPatterns(searchPatterns, buildWorkspaceRoot);
-        expandPatterns(searchPatterns, buildOutputRoot);
+        expandPatterns(buildWorkspaceRoot);
+        expandPatterns(buildOutputRoot);
+        createPrefixTables();
 
         System.out.println("Looking for:");
         for (byte[] searchPattern : searchPatterns) {
@@ -118,7 +126,7 @@ public class AbsPathsInImage {
         System.out.println();
 
         AbsPathsInImage absPathsInImage = new AbsPathsInImage();
-        absPathsInImage.scanFiles(dirToScan, searchPatterns);
+        absPathsInImage.scanFiles(dirToScan);
 
         if (absPathsInImage.matchFound) {
             throw new Exception("Test failed");
@@ -129,7 +137,7 @@ public class AbsPathsInImage {
      * Add path pattern to list of patterns to search for. Create all possible
      * variants depending on platform.
      */
-    private static void expandPatterns(List<byte[]> searchPatterns, String pattern) {
+    private static void expandPatterns(String pattern) {
         if (IS_WINDOWS) {
             String forward = pattern.replace('\\', '/');
             String back = pattern.replace('/', '\\');
@@ -151,7 +159,42 @@ public class AbsPathsInImage {
         }
     }
 
-    private void scanFiles(Path root, List<byte[]> searchPatterns) throws IOException {
+    /**
+     * The failure function for KMP. Returns the correct index in the pattern to jump
+     * back to when encountering a mismatched character. Used in both
+     * createPrefixTables (pre-processing) and scanBytes (matching).
+     */
+    private static int getPrefixIndex(int patternIdx, int state, byte match) {
+        if (state == 0) {
+            return 0;
+        }
+        byte[] searchPattern = searchPatterns.get(patternIdx);
+        int[] prefixTable = prefixTables.get(patternIdx);
+        int i = prefixTable[state - 1];
+        while (i > 0 && searchPattern[i] != match) {
+            i = prefixTable[i - 1];
+        }
+        return searchPattern[i] == match ? i + 1 : i;
+    }
+
+    /**
+     * Pre-processing string patterns for Knuth–Morris–Pratt (KMP) search algorithm.
+     * Lookup tables of longest prefixes at each given index are created for each
+     * search pattern string. These tables are later used in scanBytes during matching
+     * as lookups for failure state transitions.
+     */
+    private static void createPrefixTables() {
+        for (int patternIdx = 0; patternIdx < searchPatterns.size(); patternIdx++) {
+            int patternLen = searchPatterns.get(patternIdx).length;
+            int[] prefixTable = new int[patternLen];
+            prefixTables.add(prefixTable);
+            for (int i = 1; i < patternLen; i++) {
+                prefixTable[i] = getPrefixIndex(patternIdx, i, searchPatterns.get(patternIdx)[i]);
+            }
+        }
+    }
+
+    private void scanFiles(Path root) throws IOException {
         Files.walkFileTree(root, new SimpleFileVisitor<>() {
             @Override
             public FileVisitResult preVisitDirectory(Path dir, BasicFileAttributes attrs) throws IOException {
@@ -170,84 +213,128 @@ public class AbsPathsInImage {
                 } else if ((fileName.endsWith(".debuginfo") && !IS_LINUX) || fileName.endsWith(".pdb")) {
                     // Do nothing
                 } else if (fileName.endsWith(".zip")) {
-                    scanZipFile(file, searchPatterns);
+                    scanZipFile(file);
                 } else {
-                    scanFile(file, searchPatterns);
+                    scanFile(file);
                 }
                 return super.visitFile(file, attrs);
             }
         });
     }
 
-    private void scanFile(Path file, List<byte[]> searchPatterns) throws IOException {
-        List<String> matches = scanBytes(Files.readAllBytes(file), searchPatterns);
-        if (matches.size() > 0) {
-            matchFound = true;
-            System.out.println(file + ":");
-            for (String match : matches) {
-                System.out.println(match);
-            }
-            System.out.println();
+    private void scanFile(Path file) throws IOException {
+        List<Match> matches;
+        try (InputStream inputStream = Files.newInputStream(file)) {
+            matches = scanBytes(inputStream);
+        }
+        // test succeeds
+        if (matches.size() == 0) {
+            return;
+        }
+        // test fails; pay penalty and re-scan file for debug output
+        try (InputStream inputStream = Files.newInputStream(file)) {
+            printDebugOutput(inputStream, matches, file + ":");
         }
     }
 
-    private void scanZipFile(Path zipFile, List<byte[]> searchPatterns) throws IOException {
+    private void scanZipFile(Path zipFile) throws IOException {
+        List<List<Match>> entryMatches = new ArrayList<>();
+        boolean found = false;
+        ZipEntry zipEntry;
         try (ZipInputStream zipInputStream = new ZipInputStream(Files.newInputStream(zipFile))) {
-            ZipEntry zipEntry;
             while ((zipEntry = zipInputStream.getNextEntry()) != null) {
-                List<String> matches = scanBytes(zipInputStream.readAllBytes(), searchPatterns);
+                List<Match> matches = scanBytes(zipInputStream);
                 if (matches.size() > 0) {
-                    matchFound = true;
-                    System.out.println(zipFile + ", " + zipEntry.getName() + ":");
-                    for (String match : matches) {
-                        System.out.println(match);
-                    }
-                    System.out.println();
+                    entryMatches.add(matches);
+                    found = true;
+                } else {
+                    entryMatches.add(null);
+                }
+            }
+        }
+        // test succeeds
+        if (!found) {
+            return;
+        }
+        // test fails
+        try (ZipInputStream zipInputStream = new ZipInputStream(Files.newInputStream(zipFile))) {
+            int i = 0;
+            while ((zipEntry = zipInputStream.getNextEntry()) != null) {
+                List<Match> matches = entryMatches.get(i);
+                i++;
+                if (matches != null) {
+                    printDebugOutput(zipInputStream, matches, zipFile + ", " + zipEntry.getName() + ":");
                 }
             }
         }
     }
 
-    private List<String> scanBytes(byte[] data, List<byte[]> searchPatterns) {
-        List<String> matches = new ArrayList<>();
-        for (int i = 0; i < data.length; i++) {
-            for (byte[] searchPattern : searchPatterns) {
-                boolean found = true;
-                for (int j = 0; j < searchPattern.length; j++) {
-                    if ((i + j >= data.length || data[i + j] != searchPattern[j])) {
-                        found = false;
+    /**
+     * Scans each byte until encounters a match with one of searchPatterns. Uses KMP to
+     * perform matches. Keep track of current matched index (states) for each search
+     * pattern. At each given byte, update states accordingly (increment if match or
+     * failure function transition if mismatch). Returns a list of Match objects.
+     */
+    private List<Match> scanBytes(InputStream input) throws IOException {
+        List<Match> matches = new ArrayList<>();
+        byte[] buf = new byte[DEFAULT_BUFFER_SIZE];
+        int[] states = new int[searchPatterns.size()];
+        int fileIdx = 0;
+        int bytesRead, patternLen;
+        while ((bytesRead = input.read(buf)) != -1) {
+            for (int bufIdx = 0; bufIdx < bytesRead; bufIdx++, fileIdx++) {
+                byte datum = buf[bufIdx];
+                for (int i = 0; i < searchPatterns.size(); i++) {
+                    patternLen = searchPatterns.get(i).length;
+                    if (datum != searchPatterns.get(i)[states[i]]) {
+                        states[i] = getPrefixIndex(i, states[i], datum);
+                    } else if (++states[i] == patternLen) {
+                        // technically at last match, state should reset according to failure function
+                        // but in original test, matching didn't search same string for multiple matches
+                        states[i] = 0;
+                        matches.add(new Match(fileIdx - patternLen + 1, fileIdx));
                         break;
                     }
                 }
-                if (found) {
-                    matches.add(new String(data, charsStart(data, i), charsOffset(data, i, searchPattern.length)));
-                    // No need to search the same string for multiple patterns
-                    break;
-                }
             }
         }
         return matches;
     }
 
-    private int charsStart(byte[] data, int startIndex) {
-        int index = startIndex;
-        while (--index > 0) {
-            byte datum = data[index];
-            if (datum < 32 || datum > 126) {
-                break;
+    /**
+     * In original test, failed test output would backtrack to last non-ascii byte on
+     * matched pattern. This is incompatible with the new buffered approach (and a
+     * proper solution requires a 2nd dynamic buffer). Instead, on failed test case,
+     * files are scanned a 2nd time to print debug output. Failed runs will pay
+     * additional performance/space penalty, but passing runs are faster.
+     */
+    private void printDebugOutput(InputStream input, List<Match> matches, final String HEADER) throws IOException{
+        matchFound = true;
+        System.out.println(HEADER);
+        matches.sort(comparing(Match::begin));
+        ByteArrayOutputStream output = new ByteArrayOutputStream();
+        byte[] buf = new byte[DEFAULT_BUFFER_SIZE];
+        int matchIdx = 0;
+        int fileIdx = 0;
+        int bytesRead;
+        while (matchIdx < matches.size() && (bytesRead = input.read(buf)) != -1) {
+            for (int i = 0; matchIdx < matches.size() && i < bytesRead; i++, fileIdx++) {
+                byte datum = buf[i];
+                if (datum >= 32 && datum <= 126) {
+                    output.write(datum);
+                } else if (fileIdx < matches.get(matchIdx).begin()) {
+                    output.reset();
+                } else if (fileIdx > matches.get(matchIdx).end()) {
+                    System.out.println(output.toString());
+                    output.reset();
+                    // This imperfect as incorrect in edge cases with patterns containing non-ascii?
+                    // but high-accuracy not priority + output still legible and useful
+                    for (; matchIdx < matches.size() && matches.get(matchIdx).end() < fileIdx; matchIdx++);
+                } else {
+                    output.write(datum);
+                }
             }
         }
-        return index + 1;
-    }
-
-    private int charsOffset(byte[] data, int startIndex, int startOffset) {
-        int offset = startOffset;
-        while (startIndex + ++offset < data.length) {
-            byte datum = data[startIndex + offset];
-            if (datum < 32 || datum > 126) {
-                break;
-            }
-        }
-        return offset;
+        System.out.println();
     }
 }