From dddfcd03aa30514d63eceff707d48bff35e93c56 Mon Sep 17 00:00:00 2001 From: Kerem Kat Date: Wed, 5 Nov 2025 08:33:14 +0000 Subject: [PATCH] 8334866: Improve Speed of ElfDecoder source search Reviewed-by: shade, chagedorn --- src/hotspot/share/utilities/elfFile.cpp | 148 +++++++++++++++++++++++- src/hotspot/share/utilities/elfFile.hpp | 90 ++++++++++++-- 2 files changed, 225 insertions(+), 13 deletions(-) diff --git a/src/hotspot/share/utilities/elfFile.cpp b/src/hotspot/share/utilities/elfFile.cpp index e3cbb5ac18e..9ea19b38276 100644 --- a/src/hotspot/share/utilities/elfFile.cpp +++ b/src/hotspot/share/utilities/elfFile.cpp @@ -684,9 +684,8 @@ bool ElfFile::create_new_dwarf_file(const char* filepath) { // Starting point of reading line number and filename information from the DWARF file. bool DwarfFile::get_filename_and_line_number(const uint32_t offset_in_library, char* filename, const size_t filename_len, int* line, const bool is_pc_after_call) { - DebugAranges debug_aranges(this); uint32_t compilation_unit_offset = 0; // 4-bytes for 32-bit DWARF - if (!debug_aranges.find_compilation_unit_offset(offset_in_library, &compilation_unit_offset)) { + if (!_debug_aranges.find_compilation_unit_offset(offset_in_library, &compilation_unit_offset)) { DWARF_LOG_ERROR("Failed to find .debug_info offset for the compilation unit."); return false; } @@ -708,11 +707,87 @@ bool DwarfFile::get_filename_and_line_number(const uint32_t offset_in_library, c return true; } +// Build sorted cache of all address ranges for binary search. +DwarfFile::DebugAranges::CacheHint DwarfFile::DebugAranges::ensure_cached() { + if (_cache._failed) { + return CacheHint::FAILED; + } + if (_cache._initialized) { + return CacheHint::VALID; + } + + assert(_cache._capacity == 0, "need fresh cache"); + assert(_cache._count == 0, "need fresh cache"); + const long pos = _reader.get_position(); + if (!read_section_header()) { + _cache.destroy(true); + return CacheHint::FAILED; + } + + // Start with reasonable initial capacity to minimize number of grow/realloc calls. + // Assume ~3% of the .debug_aranges is DebugArangesSetHeader and the rest is made up of AddressDescriptors. + const uintptr_t estimated_set_header_size = _size_bytes / 32; + const size_t initial_capacity = (_size_bytes - estimated_set_header_size) / sizeof(AddressDescriptor); + _cache._entries = NEW_C_HEAP_ARRAY_RETURN_NULL(ArangesEntry, initial_capacity, mtInternal); + if (_cache._entries == nullptr) { + _cache.destroy(true); + _reader.set_position(pos); + return CacheHint::TRY_LINEAR_SCAN; + } + _cache._capacity = initial_capacity; + _cache._count = 0; + + // Read all sets and their descriptors + while (_reader.has_bytes_left()) { + DebugArangesSetHeader set_header; + if (!read_set_header(set_header)) { + break; + } + + // Read all address descriptors for this set into the cache. + AddressDescriptor descriptor; + do { + if (!read_address_descriptor(descriptor)) { + _cache.destroy(true); + return CacheHint::FAILED; + } + if (!is_terminating_entry(set_header, descriptor) && descriptor.range_length > 0 && + !_cache.add_entry(descriptor, set_header._debug_info_offset)) { + _cache.destroy(true); + _reader.set_position(pos); + return CacheHint::TRY_LINEAR_SCAN; + } + } while (!is_terminating_entry(set_header, descriptor) && _reader.has_bytes_left()); + } + + if (_cache._count == 0) { + _cache.destroy(false); + // No entries found, unusual but still valid. + return CacheHint::VALID; + } + _cache.sort(); + _cache._initialized = true; + DWARF_LOG_INFO("Built .debug_aranges cache for '%s' with %zu entries", this->_dwarf_file->filepath(), _cache._count); + return CacheHint::VALID; +} + // (2) The .debug_aranges section contains a number of entries/sets. Each set contains one or multiple address range descriptors of the // form [beginning_address, beginning_address+length). Start reading these sets and their descriptors until we find one that contains // 'offset_in_library'. Read the debug_info_offset field from the header of this set which defines the offset for the compilation unit. // This process is described in section 6.1.2 of the DWARF 4 spec. bool DwarfFile::DebugAranges::find_compilation_unit_offset(const uint32_t offset_in_library, uint32_t* compilation_unit_offset) { + switch (ensure_cached()) { + case CacheHint::VALID: + return _cache.find_compilation_unit_offset(offset_in_library, compilation_unit_offset); + case CacheHint::TRY_LINEAR_SCAN: + break; + case CacheHint::FAILED: + return false; + } + + // Fall back to linear scan if building of the cache failed, which can happen + // if there are C heap allocation errors. + DWARF_LOG_INFO("Falling back to linear scan of .debug_aranges for '%s'", _dwarf_file->filepath()); if (!read_section_header()) { DWARF_LOG_ERROR("Failed to read a .debug_aranges header."); return false; @@ -750,6 +825,7 @@ bool DwarfFile::DebugAranges::read_section_header() { } _section_start_address = shdr.sh_offset; + _size_bytes = shdr.sh_size; _reader.set_max_pos(shdr.sh_offset + shdr.sh_size); return _reader.set_position(shdr.sh_offset); } @@ -829,6 +905,74 @@ bool DwarfFile::DebugAranges::is_terminating_entry(const DwarfFile::DebugAranges return is_terminating; } +// Sort entries by beginning_address, when same then sort longest range first. +int DwarfFile::ArangesCache::compare_aranges_entries(const ArangesEntry& a, const ArangesEntry& b) { + if (a.beginning_address < b.beginning_address) { + return -1; + } else if (a.beginning_address > b.beginning_address) { + return 1; + } + + uintptr_t len_a = a.end_address - a.beginning_address; + uintptr_t len_b = b.end_address - b.beginning_address; + if (len_a < len_b) { + return 1; + } else if (len_a > len_b) { + return -1; + } + return 0; +} + +void DwarfFile::ArangesCache::sort() { + QuickSort::sort(_entries, _count, compare_aranges_entries); +} + +bool DwarfFile::ArangesCache::add_entry(const AddressDescriptor& descriptor, uint32_t debug_info_offset) { + if (_count >= _capacity && !grow()) { + return false; + } + _entries[_count] = ArangesEntry( + descriptor.beginning_address, + descriptor.beginning_address + descriptor.range_length, + debug_info_offset + ); + _count++; + return true; +} + +bool DwarfFile::ArangesCache::grow() { + size_t new_capacity = _capacity == 0 ? 128 : _capacity * 1.5; + ArangesEntry* new_entries = REALLOC_C_HEAP_ARRAY_RETURN_NULL(ArangesEntry, _entries, new_capacity, mtInternal); + if (new_entries == nullptr) { + return false; + } + _entries = new_entries; + _capacity = new_capacity; + return true; +} + +bool DwarfFile::ArangesCache::find_compilation_unit_offset(uint32_t offset_in_library, uint32_t* compilation_unit_offset) const { + if (!_initialized || _entries == nullptr || _count == 0) { + return false; + } + + size_t left = 0; + size_t right = _count; + while (left < right) { + size_t mid = left + (right - left) / 2; + const ArangesEntry& entry = _entries[mid]; + if (offset_in_library < entry.beginning_address) { + right = mid; + } else if (offset_in_library >= entry.end_address) { + left = mid + 1; + } else { + *compilation_unit_offset = entry.debug_info_offset; + return true; + } + } + return false; +} + // Find the .debug_line offset for the line number program by reading from the .debug_abbrev and .debug_info section. bool DwarfFile::CompilationUnit::find_debug_line_offset(uint32_t* debug_line_offset) { // (3a,b) diff --git a/src/hotspot/share/utilities/elfFile.hpp b/src/hotspot/share/utilities/elfFile.hpp index 979fac5edfc..1298892533e 100644 --- a/src/hotspot/share/utilities/elfFile.hpp +++ b/src/hotspot/share/utilities/elfFile.hpp @@ -72,6 +72,7 @@ typedef Elf32_Sym Elf_Sym; #include "memory/allocation.hpp" #include "utilities/checkedCast.hpp" #include "utilities/decoder.hpp" +#include "utilities/quickSort.hpp" #ifdef ASSERT // Helper macros to print different log levels during DWARF parsing @@ -94,10 +95,10 @@ typedef Elf32_Sym Elf_Sym; #define DWARF_LOG_TRACE(format, ...) #endif +class DwarfFile; +class ElfFuncDescTable; class ElfStringTable; class ElfSymbolTable; -class ElfFuncDescTable; -class DwarfFile; // ELF section, may or may not have cached data class ElfSection { @@ -201,6 +202,7 @@ class ElfFile: public CHeapObj { bool get_source_info(uint32_t offset_in_library, char* filename, size_t filename_len, int* line, bool is_pc_after_call); + DEBUG_ONLY(const char* filepath() const { return _filepath; }) private: // sanity check, if the file is a real elf file static bool is_elf_file(Elf_Ehdr&); @@ -397,7 +399,6 @@ class ElfFile: public CHeapObj { * - Complete information about intermediate states/results when parsing the DWARF file. */ class DwarfFile : public ElfFile { - static constexpr uint8_t ADDRESS_SIZE = NOT_LP64(4) LP64_ONLY(8); // We only support 32-bit DWARF (emitted by GCC) which uses 32-bit values for DWARF section lengths and offsets // relative to the beginning of a section. @@ -435,6 +436,63 @@ class DwarfFile : public ElfFile { bool read_non_null_char(char* result); }; + // Address descriptor defining a range that is covered by a compilation unit. It is defined in section 6.1.2 after + // the set header in the DWARF 4 spec. + struct AddressDescriptor { + uintptr_t beginning_address = 0; + uintptr_t range_length = 0; + }; + + // Entry in ArangesCache, corresponding to an entry in .debug_aranges section. + struct ArangesEntry { + uintptr_t beginning_address; + uintptr_t end_address; + uint32_t debug_info_offset; + + ArangesEntry() : beginning_address(0), end_address(0), debug_info_offset(0) {} + ArangesEntry(uintptr_t begin, uintptr_t end, uint32_t offset) + : beginning_address(begin), end_address(end), debug_info_offset(offset) {} + }; + + // Cache for .debug_aranges to enable binary search for address lookup. + // DebugAranges uses this cache to resolve the compilation_unit_offset, rather than doing a linear scan on the files + // in each invocation of DebugAranges::find_compilation_unit_offset. + struct ArangesCache { + ArangesEntry* _entries; + size_t _count; + size_t _capacity; + bool _initialized; + bool _failed; + + ArangesCache() : _entries(nullptr), _count(0), _capacity(0), _initialized(false), _failed(false) {} + ArangesCache(const ArangesCache&) = delete; + ArangesCache& operator=(const ArangesCache&) = delete; + ~ArangesCache() { + this->free(); + } + + void destroy(bool failed) { + this->free(); + _count = 0; + _capacity = 0; + _failed = failed; + } + bool find_compilation_unit_offset(uint32_t offset_in_library, uint32_t* compilation_unit_offset) const; + bool valid() const { return _initialized && !_failed; } + bool add_entry(const AddressDescriptor& descriptor, uint32_t debug_info_offset); + void sort(); + + private: + static int compare_aranges_entries(const ArangesEntry& a, const ArangesEntry& b); + bool grow(); + void free() { + if (_entries != nullptr) { + FREE_C_HEAP_ARRAY(ArangesEntry, _entries); + _entries = nullptr; + } + } + }; + // (2) Processing the .debug_aranges section to find the compilation unit which covers offset_in_library. // This is specified in section 6.1.2 of the DWARF 4 spec. // @@ -475,16 +533,22 @@ class DwarfFile : public ElfFile { uint8_t _segment_size; }; - // Address descriptor defining a range that is covered by a compilation unit. It is defined in section 6.1.2 after - // the set header in the DWARF 4 spec. - struct AddressDescriptor { - uintptr_t beginning_address = 0; - uintptr_t range_length = 0; + enum class CacheHint { + // Do not retry as linear scan won't be able to read this either. + FAILED, + + // Cache is usable, no need to fall back to linear scan. + VALID, + + // Cache is unusable, possible reasons are C heap allocation failures. Fall back to linear scan. + TRY_LINEAR_SCAN, }; DwarfFile* _dwarf_file; + ArangesCache _cache; MarkedDwarfFileReader _reader; uintptr_t _section_start_address; + uintptr_t _size_bytes; // a calculated end position long _entry_end; @@ -499,9 +563,9 @@ class DwarfFile : public ElfFile { const AddressDescriptor& descriptor); public: DebugAranges(DwarfFile* dwarf_file) : _dwarf_file(dwarf_file), _reader(dwarf_file->fd()), - _section_start_address(0), _entry_end(0) {} + _section_start_address(0), _size_bytes(0), _entry_end(0) {} bool find_compilation_unit_offset(uint32_t offset_in_library, uint32_t* compilation_unit_offset); - + CacheHint ensure_cached(); }; // (3a-c,e) The compilation unit is read from the .debug_info section. The structure of .debug_info is shown in the @@ -884,7 +948,8 @@ class DwarfFile : public ElfFile { }; public: - DwarfFile(const char* filepath) : ElfFile(filepath) {} + DwarfFile(const char* filepath) : ElfFile(filepath), _debug_aranges(this) { + } /* * Starting point of reading line number and filename information from the DWARF file. @@ -897,6 +962,9 @@ class DwarfFile : public ElfFile { * More details about the different phases can be found at the associated methods. */ bool get_filename_and_line_number(uint32_t offset_in_library, char* filename, size_t filename_len, int* line, bool is_pc_after_call); + + private: + DebugAranges _debug_aranges; }; #endif // !_WINDOWS && !__APPLE__