8341735: Rewrite the build/AbsPathsInImage.java test to not load the entire file at once

Reviewed-by: erikj
This commit is contained in:
Daniel Hu 2025-10-27 16:48:51 +00:00 committed by Cesar Soares Lucas
parent ebf9c5bfc1
commit c25f35205a

View File

@ -21,6 +21,7 @@
* questions.
*/
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.FileVisitResult;
@ -35,6 +36,8 @@ import java.util.Properties;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import static java.util.Comparator.comparing;
/*
* @test
* @bug 8226346
@ -42,7 +45,7 @@ import java.util.zip.ZipInputStream;
* @requires !vm.debug
* @comment ASAN keeps the 'unwanted' paths in the binaries because of its build options
* @requires !vm.asan
* @run main/othervm -Xmx900m AbsPathsInImage
* @run main AbsPathsInImage
*/
public class AbsPathsInImage {
@ -51,9 +54,14 @@ public class AbsPathsInImage {
public static final String DIR_PROPERTY = "jdk.test.build.AbsPathsInImage.dir";
private static final boolean IS_WINDOWS = System.getProperty("os.name").toLowerCase().contains("windows");
private static final boolean IS_LINUX = System.getProperty("os.name").toLowerCase().contains("linux");
private static final int DEFAULT_BUFFER_SIZE = 8192;
private static List<byte[]> searchPatterns = new ArrayList<>();
private static List<int[]> prefixTables = new ArrayList<>();
private boolean matchFound = false;
record Match(int begin, int end) { }
public static void main(String[] args) throws Exception {
String jdkPathString = System.getProperty("test.jdk");
Path jdkHome = Paths.get(jdkPathString);
@ -107,9 +115,9 @@ public class AbsPathsInImage {
throw new Error("Output root is not an absolute path: " + buildOutputRoot);
}
List<byte[]> searchPatterns = new ArrayList<>();
expandPatterns(searchPatterns, buildWorkspaceRoot);
expandPatterns(searchPatterns, buildOutputRoot);
expandPatterns(buildWorkspaceRoot);
expandPatterns(buildOutputRoot);
createPrefixTables();
System.out.println("Looking for:");
for (byte[] searchPattern : searchPatterns) {
@ -118,7 +126,7 @@ public class AbsPathsInImage {
System.out.println();
AbsPathsInImage absPathsInImage = new AbsPathsInImage();
absPathsInImage.scanFiles(dirToScan, searchPatterns);
absPathsInImage.scanFiles(dirToScan);
if (absPathsInImage.matchFound) {
throw new Exception("Test failed");
@ -129,7 +137,7 @@ public class AbsPathsInImage {
* Add path pattern to list of patterns to search for. Create all possible
* variants depending on platform.
*/
private static void expandPatterns(List<byte[]> searchPatterns, String pattern) {
private static void expandPatterns(String pattern) {
if (IS_WINDOWS) {
String forward = pattern.replace('\\', '/');
String back = pattern.replace('/', '\\');
@ -151,7 +159,42 @@ public class AbsPathsInImage {
}
}
private void scanFiles(Path root, List<byte[]> searchPatterns) throws IOException {
/**
* The failure function for KMP. Returns the correct index in the pattern to jump
* back to when encountering a mismatched character. Used in both
* createPrefixTables (pre-processing) and scanBytes (matching).
*/
private static int getPrefixIndex(int patternIdx, int state, byte match) {
if (state == 0) {
return 0;
}
byte[] searchPattern = searchPatterns.get(patternIdx);
int[] prefixTable = prefixTables.get(patternIdx);
int i = prefixTable[state - 1];
while (i > 0 && searchPattern[i] != match) {
i = prefixTable[i - 1];
}
return searchPattern[i] == match ? i + 1 : i;
}
/**
* Pre-processing string patterns for KnuthMorrisPratt (KMP) search algorithm.
* Lookup tables of longest prefixes at each given index are created for each
* search pattern string. These tables are later used in scanBytes during matching
* as lookups for failure state transitions.
*/
private static void createPrefixTables() {
for (int patternIdx = 0; patternIdx < searchPatterns.size(); patternIdx++) {
int patternLen = searchPatterns.get(patternIdx).length;
int[] prefixTable = new int[patternLen];
prefixTables.add(prefixTable);
for (int i = 1; i < patternLen; i++) {
prefixTable[i] = getPrefixIndex(patternIdx, i, searchPatterns.get(patternIdx)[i]);
}
}
}
private void scanFiles(Path root) throws IOException {
Files.walkFileTree(root, new SimpleFileVisitor<>() {
@Override
public FileVisitResult preVisitDirectory(Path dir, BasicFileAttributes attrs) throws IOException {
@ -170,84 +213,128 @@ public class AbsPathsInImage {
} else if ((fileName.endsWith(".debuginfo") && !IS_LINUX) || fileName.endsWith(".pdb")) {
// Do nothing
} else if (fileName.endsWith(".zip")) {
scanZipFile(file, searchPatterns);
scanZipFile(file);
} else {
scanFile(file, searchPatterns);
scanFile(file);
}
return super.visitFile(file, attrs);
}
});
}
private void scanFile(Path file, List<byte[]> searchPatterns) throws IOException {
List<String> matches = scanBytes(Files.readAllBytes(file), searchPatterns);
if (matches.size() > 0) {
matchFound = true;
System.out.println(file + ":");
for (String match : matches) {
System.out.println(match);
}
System.out.println();
private void scanFile(Path file) throws IOException {
List<Match> matches;
try (InputStream inputStream = Files.newInputStream(file)) {
matches = scanBytes(inputStream);
}
// test succeeds
if (matches.size() == 0) {
return;
}
// test fails; pay penalty and re-scan file for debug output
try (InputStream inputStream = Files.newInputStream(file)) {
printDebugOutput(inputStream, matches, file + ":");
}
}
private void scanZipFile(Path zipFile, List<byte[]> searchPatterns) throws IOException {
private void scanZipFile(Path zipFile) throws IOException {
List<List<Match>> entryMatches = new ArrayList<>();
boolean found = false;
ZipEntry zipEntry;
try (ZipInputStream zipInputStream = new ZipInputStream(Files.newInputStream(zipFile))) {
ZipEntry zipEntry;
while ((zipEntry = zipInputStream.getNextEntry()) != null) {
List<String> matches = scanBytes(zipInputStream.readAllBytes(), searchPatterns);
List<Match> matches = scanBytes(zipInputStream);
if (matches.size() > 0) {
matchFound = true;
System.out.println(zipFile + ", " + zipEntry.getName() + ":");
for (String match : matches) {
System.out.println(match);
}
System.out.println();
entryMatches.add(matches);
found = true;
} else {
entryMatches.add(null);
}
}
}
// test succeeds
if (!found) {
return;
}
// test fails
try (ZipInputStream zipInputStream = new ZipInputStream(Files.newInputStream(zipFile))) {
int i = 0;
while ((zipEntry = zipInputStream.getNextEntry()) != null) {
List<Match> matches = entryMatches.get(i);
i++;
if (matches != null) {
printDebugOutput(zipInputStream, matches, zipFile + ", " + zipEntry.getName() + ":");
}
}
}
}
private List<String> scanBytes(byte[] data, List<byte[]> searchPatterns) {
List<String> matches = new ArrayList<>();
for (int i = 0; i < data.length; i++) {
for (byte[] searchPattern : searchPatterns) {
boolean found = true;
for (int j = 0; j < searchPattern.length; j++) {
if ((i + j >= data.length || data[i + j] != searchPattern[j])) {
found = false;
/**
* Scans each byte until encounters a match with one of searchPatterns. Uses KMP to
* perform matches. Keep track of current matched index (states) for each search
* pattern. At each given byte, update states accordingly (increment if match or
* failure function transition if mismatch). Returns a list of Match objects.
*/
private List<Match> scanBytes(InputStream input) throws IOException {
List<Match> matches = new ArrayList<>();
byte[] buf = new byte[DEFAULT_BUFFER_SIZE];
int[] states = new int[searchPatterns.size()];
int fileIdx = 0;
int bytesRead, patternLen;
while ((bytesRead = input.read(buf)) != -1) {
for (int bufIdx = 0; bufIdx < bytesRead; bufIdx++, fileIdx++) {
byte datum = buf[bufIdx];
for (int i = 0; i < searchPatterns.size(); i++) {
patternLen = searchPatterns.get(i).length;
if (datum != searchPatterns.get(i)[states[i]]) {
states[i] = getPrefixIndex(i, states[i], datum);
} else if (++states[i] == patternLen) {
// technically at last match, state should reset according to failure function
// but in original test, matching didn't search same string for multiple matches
states[i] = 0;
matches.add(new Match(fileIdx - patternLen + 1, fileIdx));
break;
}
}
if (found) {
matches.add(new String(data, charsStart(data, i), charsOffset(data, i, searchPattern.length)));
// No need to search the same string for multiple patterns
break;
}
}
}
return matches;
}
private int charsStart(byte[] data, int startIndex) {
int index = startIndex;
while (--index > 0) {
byte datum = data[index];
if (datum < 32 || datum > 126) {
break;
/**
* In original test, failed test output would backtrack to last non-ascii byte on
* matched pattern. This is incompatible with the new buffered approach (and a
* proper solution requires a 2nd dynamic buffer). Instead, on failed test case,
* files are scanned a 2nd time to print debug output. Failed runs will pay
* additional performance/space penalty, but passing runs are faster.
*/
private void printDebugOutput(InputStream input, List<Match> matches, final String HEADER) throws IOException{
matchFound = true;
System.out.println(HEADER);
matches.sort(comparing(Match::begin));
ByteArrayOutputStream output = new ByteArrayOutputStream();
byte[] buf = new byte[DEFAULT_BUFFER_SIZE];
int matchIdx = 0;
int fileIdx = 0;
int bytesRead;
while (matchIdx < matches.size() && (bytesRead = input.read(buf)) != -1) {
for (int i = 0; matchIdx < matches.size() && i < bytesRead; i++, fileIdx++) {
byte datum = buf[i];
if (datum >= 32 && datum <= 126) {
output.write(datum);
} else if (fileIdx < matches.get(matchIdx).begin()) {
output.reset();
} else if (fileIdx > matches.get(matchIdx).end()) {
System.out.println(output.toString());
output.reset();
// This imperfect as incorrect in edge cases with patterns containing non-ascii?
// but high-accuracy not priority + output still legible and useful
for (; matchIdx < matches.size() && matches.get(matchIdx).end() < fileIdx; matchIdx++);
} else {
output.write(datum);
}
}
}
return index + 1;
}
private int charsOffset(byte[] data, int startIndex, int startOffset) {
int offset = startOffset;
while (startIndex + ++offset < data.length) {
byte datum = data[startIndex + offset];
if (datum < 32 || datum > 126) {
break;
}
}
return offset;
System.out.println();
}
}