8334866: Improve Speed of ElfDecoder source search

Reviewed-by: shade, chagedorn
This commit is contained in:
Kerem Kat 2025-11-05 08:33:14 +00:00 committed by Aleksey Shipilev
parent a0e70c4e94
commit dddfcd03aa
2 changed files with 225 additions and 13 deletions

View File

@ -684,9 +684,8 @@ bool ElfFile::create_new_dwarf_file(const char* filepath) {
// Starting point of reading line number and filename information from the DWARF file.
bool DwarfFile::get_filename_and_line_number(const uint32_t offset_in_library, char* filename, const size_t filename_len,
int* line, const bool is_pc_after_call) {
DebugAranges debug_aranges(this);
uint32_t compilation_unit_offset = 0; // 4-bytes for 32-bit DWARF
if (!debug_aranges.find_compilation_unit_offset(offset_in_library, &compilation_unit_offset)) {
if (!_debug_aranges.find_compilation_unit_offset(offset_in_library, &compilation_unit_offset)) {
DWARF_LOG_ERROR("Failed to find .debug_info offset for the compilation unit.");
return false;
}
@ -708,11 +707,87 @@ bool DwarfFile::get_filename_and_line_number(const uint32_t offset_in_library, c
return true;
}
// Build sorted cache of all address ranges for binary search.
DwarfFile::DebugAranges::CacheHint DwarfFile::DebugAranges::ensure_cached() {
if (_cache._failed) {
return CacheHint::FAILED;
}
if (_cache._initialized) {
return CacheHint::VALID;
}
assert(_cache._capacity == 0, "need fresh cache");
assert(_cache._count == 0, "need fresh cache");
const long pos = _reader.get_position();
if (!read_section_header()) {
_cache.destroy(true);
return CacheHint::FAILED;
}
// Start with reasonable initial capacity to minimize number of grow/realloc calls.
// Assume ~3% of the .debug_aranges is DebugArangesSetHeader and the rest is made up of AddressDescriptors.
const uintptr_t estimated_set_header_size = _size_bytes / 32;
const size_t initial_capacity = (_size_bytes - estimated_set_header_size) / sizeof(AddressDescriptor);
_cache._entries = NEW_C_HEAP_ARRAY_RETURN_NULL(ArangesEntry, initial_capacity, mtInternal);
if (_cache._entries == nullptr) {
_cache.destroy(true);
_reader.set_position(pos);
return CacheHint::TRY_LINEAR_SCAN;
}
_cache._capacity = initial_capacity;
_cache._count = 0;
// Read all sets and their descriptors
while (_reader.has_bytes_left()) {
DebugArangesSetHeader set_header;
if (!read_set_header(set_header)) {
break;
}
// Read all address descriptors for this set into the cache.
AddressDescriptor descriptor;
do {
if (!read_address_descriptor(descriptor)) {
_cache.destroy(true);
return CacheHint::FAILED;
}
if (!is_terminating_entry(set_header, descriptor) && descriptor.range_length > 0 &&
!_cache.add_entry(descriptor, set_header._debug_info_offset)) {
_cache.destroy(true);
_reader.set_position(pos);
return CacheHint::TRY_LINEAR_SCAN;
}
} while (!is_terminating_entry(set_header, descriptor) && _reader.has_bytes_left());
}
if (_cache._count == 0) {
_cache.destroy(false);
// No entries found, unusual but still valid.
return CacheHint::VALID;
}
_cache.sort();
_cache._initialized = true;
DWARF_LOG_INFO("Built .debug_aranges cache for '%s' with %zu entries", this->_dwarf_file->filepath(), _cache._count);
return CacheHint::VALID;
}
// (2) The .debug_aranges section contains a number of entries/sets. Each set contains one or multiple address range descriptors of the
// form [beginning_address, beginning_address+length). Start reading these sets and their descriptors until we find one that contains
// 'offset_in_library'. Read the debug_info_offset field from the header of this set which defines the offset for the compilation unit.
// This process is described in section 6.1.2 of the DWARF 4 spec.
bool DwarfFile::DebugAranges::find_compilation_unit_offset(const uint32_t offset_in_library, uint32_t* compilation_unit_offset) {
switch (ensure_cached()) {
case CacheHint::VALID:
return _cache.find_compilation_unit_offset(offset_in_library, compilation_unit_offset);
case CacheHint::TRY_LINEAR_SCAN:
break;
case CacheHint::FAILED:
return false;
}
// Fall back to linear scan if building of the cache failed, which can happen
// if there are C heap allocation errors.
DWARF_LOG_INFO("Falling back to linear scan of .debug_aranges for '%s'", _dwarf_file->filepath());
if (!read_section_header()) {
DWARF_LOG_ERROR("Failed to read a .debug_aranges header.");
return false;
@ -750,6 +825,7 @@ bool DwarfFile::DebugAranges::read_section_header() {
}
_section_start_address = shdr.sh_offset;
_size_bytes = shdr.sh_size;
_reader.set_max_pos(shdr.sh_offset + shdr.sh_size);
return _reader.set_position(shdr.sh_offset);
}
@ -829,6 +905,74 @@ bool DwarfFile::DebugAranges::is_terminating_entry(const DwarfFile::DebugAranges
return is_terminating;
}
// Sort entries by beginning_address, when same then sort longest range first.
int DwarfFile::ArangesCache::compare_aranges_entries(const ArangesEntry& a, const ArangesEntry& b) {
if (a.beginning_address < b.beginning_address) {
return -1;
} else if (a.beginning_address > b.beginning_address) {
return 1;
}
uintptr_t len_a = a.end_address - a.beginning_address;
uintptr_t len_b = b.end_address - b.beginning_address;
if (len_a < len_b) {
return 1;
} else if (len_a > len_b) {
return -1;
}
return 0;
}
void DwarfFile::ArangesCache::sort() {
QuickSort::sort(_entries, _count, compare_aranges_entries);
}
bool DwarfFile::ArangesCache::add_entry(const AddressDescriptor& descriptor, uint32_t debug_info_offset) {
if (_count >= _capacity && !grow()) {
return false;
}
_entries[_count] = ArangesEntry(
descriptor.beginning_address,
descriptor.beginning_address + descriptor.range_length,
debug_info_offset
);
_count++;
return true;
}
bool DwarfFile::ArangesCache::grow() {
size_t new_capacity = _capacity == 0 ? 128 : _capacity * 1.5;
ArangesEntry* new_entries = REALLOC_C_HEAP_ARRAY_RETURN_NULL(ArangesEntry, _entries, new_capacity, mtInternal);
if (new_entries == nullptr) {
return false;
}
_entries = new_entries;
_capacity = new_capacity;
return true;
}
bool DwarfFile::ArangesCache::find_compilation_unit_offset(uint32_t offset_in_library, uint32_t* compilation_unit_offset) const {
if (!_initialized || _entries == nullptr || _count == 0) {
return false;
}
size_t left = 0;
size_t right = _count;
while (left < right) {
size_t mid = left + (right - left) / 2;
const ArangesEntry& entry = _entries[mid];
if (offset_in_library < entry.beginning_address) {
right = mid;
} else if (offset_in_library >= entry.end_address) {
left = mid + 1;
} else {
*compilation_unit_offset = entry.debug_info_offset;
return true;
}
}
return false;
}
// Find the .debug_line offset for the line number program by reading from the .debug_abbrev and .debug_info section.
bool DwarfFile::CompilationUnit::find_debug_line_offset(uint32_t* debug_line_offset) {
// (3a,b)

View File

@ -72,6 +72,7 @@ typedef Elf32_Sym Elf_Sym;
#include "memory/allocation.hpp"
#include "utilities/checkedCast.hpp"
#include "utilities/decoder.hpp"
#include "utilities/quickSort.hpp"
#ifdef ASSERT
// Helper macros to print different log levels during DWARF parsing
@ -94,10 +95,10 @@ typedef Elf32_Sym Elf_Sym;
#define DWARF_LOG_TRACE(format, ...)
#endif
class DwarfFile;
class ElfFuncDescTable;
class ElfStringTable;
class ElfSymbolTable;
class ElfFuncDescTable;
class DwarfFile;
// ELF section, may or may not have cached data
class ElfSection {
@ -201,6 +202,7 @@ class ElfFile: public CHeapObj<mtInternal> {
bool get_source_info(uint32_t offset_in_library, char* filename, size_t filename_len, int* line, bool is_pc_after_call);
DEBUG_ONLY(const char* filepath() const { return _filepath; })
private:
// sanity check, if the file is a real elf file
static bool is_elf_file(Elf_Ehdr&);
@ -397,7 +399,6 @@ class ElfFile: public CHeapObj<mtInternal> {
* - Complete information about intermediate states/results when parsing the DWARF file.
*/
class DwarfFile : public ElfFile {
static constexpr uint8_t ADDRESS_SIZE = NOT_LP64(4) LP64_ONLY(8);
// We only support 32-bit DWARF (emitted by GCC) which uses 32-bit values for DWARF section lengths and offsets
// relative to the beginning of a section.
@ -435,6 +436,63 @@ class DwarfFile : public ElfFile {
bool read_non_null_char(char* result);
};
// Address descriptor defining a range that is covered by a compilation unit. It is defined in section 6.1.2 after
// the set header in the DWARF 4 spec.
struct AddressDescriptor {
uintptr_t beginning_address = 0;
uintptr_t range_length = 0;
};
// Entry in ArangesCache, corresponding to an entry in .debug_aranges section.
struct ArangesEntry {
uintptr_t beginning_address;
uintptr_t end_address;
uint32_t debug_info_offset;
ArangesEntry() : beginning_address(0), end_address(0), debug_info_offset(0) {}
ArangesEntry(uintptr_t begin, uintptr_t end, uint32_t offset)
: beginning_address(begin), end_address(end), debug_info_offset(offset) {}
};
// Cache for .debug_aranges to enable binary search for address lookup.
// DebugAranges uses this cache to resolve the compilation_unit_offset, rather than doing a linear scan on the files
// in each invocation of DebugAranges::find_compilation_unit_offset.
struct ArangesCache {
ArangesEntry* _entries;
size_t _count;
size_t _capacity;
bool _initialized;
bool _failed;
ArangesCache() : _entries(nullptr), _count(0), _capacity(0), _initialized(false), _failed(false) {}
ArangesCache(const ArangesCache&) = delete;
ArangesCache& operator=(const ArangesCache&) = delete;
~ArangesCache() {
this->free();
}
void destroy(bool failed) {
this->free();
_count = 0;
_capacity = 0;
_failed = failed;
}
bool find_compilation_unit_offset(uint32_t offset_in_library, uint32_t* compilation_unit_offset) const;
bool valid() const { return _initialized && !_failed; }
bool add_entry(const AddressDescriptor& descriptor, uint32_t debug_info_offset);
void sort();
private:
static int compare_aranges_entries(const ArangesEntry& a, const ArangesEntry& b);
bool grow();
void free() {
if (_entries != nullptr) {
FREE_C_HEAP_ARRAY(ArangesEntry, _entries);
_entries = nullptr;
}
}
};
// (2) Processing the .debug_aranges section to find the compilation unit which covers offset_in_library.
// This is specified in section 6.1.2 of the DWARF 4 spec.
//
@ -475,16 +533,22 @@ class DwarfFile : public ElfFile {
uint8_t _segment_size;
};
// Address descriptor defining a range that is covered by a compilation unit. It is defined in section 6.1.2 after
// the set header in the DWARF 4 spec.
struct AddressDescriptor {
uintptr_t beginning_address = 0;
uintptr_t range_length = 0;
enum class CacheHint {
// Do not retry as linear scan won't be able to read this either.
FAILED,
// Cache is usable, no need to fall back to linear scan.
VALID,
// Cache is unusable, possible reasons are C heap allocation failures. Fall back to linear scan.
TRY_LINEAR_SCAN,
};
DwarfFile* _dwarf_file;
ArangesCache _cache;
MarkedDwarfFileReader _reader;
uintptr_t _section_start_address;
uintptr_t _size_bytes;
// a calculated end position
long _entry_end;
@ -499,9 +563,9 @@ class DwarfFile : public ElfFile {
const AddressDescriptor& descriptor);
public:
DebugAranges(DwarfFile* dwarf_file) : _dwarf_file(dwarf_file), _reader(dwarf_file->fd()),
_section_start_address(0), _entry_end(0) {}
_section_start_address(0), _size_bytes(0), _entry_end(0) {}
bool find_compilation_unit_offset(uint32_t offset_in_library, uint32_t* compilation_unit_offset);
CacheHint ensure_cached();
};
// (3a-c,e) The compilation unit is read from the .debug_info section. The structure of .debug_info is shown in the
@ -884,7 +948,8 @@ class DwarfFile : public ElfFile {
};
public:
DwarfFile(const char* filepath) : ElfFile(filepath) {}
DwarfFile(const char* filepath) : ElfFile(filepath), _debug_aranges(this) {
}
/*
* Starting point of reading line number and filename information from the DWARF file.
@ -897,6 +962,9 @@ class DwarfFile : public ElfFile {
* More details about the different phases can be found at the associated methods.
*/
bool get_filename_and_line_number(uint32_t offset_in_library, char* filename, size_t filename_len, int* line, bool is_pc_after_call);
private:
DebugAranges _debug_aranges;
};
#endif // !_WINDOWS && !__APPLE__