LibDebug: Parse line number information from DWARF format

DWARF line number information, if generated, is stored  in the
.debug_line section of an object file.

The information is encoded as instructions for a VM that is defined in
the DWARF specification.
By executing these instructions, we can extract the encoded line number
information.
This commit is contained in:
Itamar 2020-04-19 23:04:43 +03:00 committed by Andreas Kling
parent edaa9c06d9
commit 8a886e0e96
5 changed files with 596 additions and 0 deletions

View file

@ -317,6 +317,95 @@ public:
return *this;
}
BufferStream& read_raw(u8* raw_data, size_t size)
{
if (m_offset + size > m_buffer.size()) {
m_read_failure = true;
return *this;
}
__builtin_memcpy(raw_data, m_buffer.data() + m_offset, size);
m_offset += size;
return *this;
};
u8 peek()
{
if (m_offset >= m_buffer.size()) {
m_read_failure = true;
return 0;
}
return m_buffer[m_offset];
}
BufferStream& operator>>(String& str)
{
if (m_offset >= m_buffer.size()) {
m_read_failure = true;
return *this;
}
size_t string_size = 0;
while (m_offset + string_size < m_buffer.size() && m_buffer[m_offset + string_size]) {
++string_size;
}
str = String(reinterpret_cast<const char*>(&m_buffer[m_offset]), string_size);
m_offset += string_size + 1;
return *this;
}
// LEB128 is a variable-length encoding for integers
BufferStream& read_LEB128_unsigned(size_t& result)
{
result = 0;
size_t num_bytes = 0;
while (true) {
if (m_offset > m_buffer.size()) {
m_read_failure = true;
break;
}
const u8 byte = m_buffer[m_offset];
result = (result) | (static_cast<size_t>(byte & ~(1 << 7)) << (num_bytes * 7));
++m_offset;
if (!(byte & (1 << 7)))
break;
++num_bytes;
}
return *this;
}
// LEB128 is a variable-length encoding for integers
BufferStream& read_LEB128_signed(ssize_t& result)
{
result = 0;
size_t num_bytes = 0;
u8 byte = 0;
do {
if (m_offset > m_buffer.size()) {
m_read_failure = true;
break;
}
byte = m_buffer[m_offset];
result = (result) | (static_cast<size_t>(byte & ~(1 << 7)) << (num_bytes * 7));
++m_offset;
++num_bytes;
} while (byte & (1 << 7));
if (num_bytes * 7 < sizeof(size_t) * 4 && (byte & 0x40)) {
// sign extend
result |= ((size_t)(-1) << (num_bytes * 7));
}
return *this;
}
BufferStream& advance(size_t amount)
{
if (m_offset + amount > m_buffer.size()) {
m_read_failure = true;
} else {
m_offset += amount;
}
return *this;
}
bool at_end() const
{
return m_offset == m_buffer.size();

View file

@ -0,0 +1,91 @@
/*
* Copyright (c) 2020, Itamar S. <itamar8910@gmail.com>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "DebugInfo.h"
#include <AK/QuickSort.h>
DebugInfo::DebugInfo(NonnullRefPtr<const ELF::Loader> elf)
: m_elf(elf)
{
prepare_lines();
}
void DebugInfo::prepare_lines()
{
auto section = m_elf->image().lookup_section(".debug_line");
ASSERT(!section.is_undefined());
auto buffer = ByteBuffer::wrap(reinterpret_cast<const u8*>(section.raw_data()), section.size());
BufferStream stream(buffer);
Vector<LineProgram::LineInfo> all_lines;
while (!stream.at_end()) {
LineProgram program(stream);
all_lines.append(move(program.lines()));
}
for (auto& line_info : all_lines) {
String file_path = line_info.file;
if (file_path.contains("Toolchain/"))
continue;
if (file_path.contains("serenity/")) {
auto start_index = file_path.index_of("serenity/").value() + String("serenity/").length();
file_path = file_path.substring(start_index, file_path.length() - start_index);
}
m_sorted_lines.append({ line_info.address, file_path, line_info.line });
}
quick_sort(m_sorted_lines, [](auto& a, auto& b) {
return a.address < b.address;
});
}
Optional<DebugInfo::SourcePosition> DebugInfo::get_source_position(u32 target_address) const
{
if (m_sorted_lines.is_empty())
return {};
if (target_address < m_sorted_lines[0].address)
return {};
// TODO: We can do a binray search here
for (size_t i = 0; i < m_sorted_lines.size() - 1; ++i) {
if (m_sorted_lines[i + 1].address > target_address) {
return Optional<SourcePosition>({ m_sorted_lines[i].file, m_sorted_lines[i].line });
}
}
return {};
}
Optional<u32> DebugInfo::get_instruction_from_source(const String& file, size_t line) const
{
for (const auto& line_entry : m_sorted_lines) {
dbg() << line_entry.file;
if (line_entry.file == file && line_entry.line == line)
return Optional<u32>(line_entry.address);
}
return {};
}

View file

@ -0,0 +1,56 @@
/*
* Copyright (c) 2020, Itamar S. <itamar8910@gmail.com>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <AK/NonnullRefPtr.h>
#include <AK/Optional.h>
#include <AK/Vector.h>
#include <LibELF/Loader.h>
#include <Libraries/LibDebug/Dwarf/LineProgram.h>
class DebugInfo {
public:
explicit DebugInfo(NonnullRefPtr<const ELF::Loader> elf);
struct SourcePosition {
String file_path;
size_t line_number { 0 };
bool operator==(const SourcePosition& other) const { return file_path == other.file_path && line_number == other.line_number; }
bool operator!=(const SourcePosition& other) const { return !(*this == other); }
};
Optional<SourcePosition> get_source_position(u32 address) const;
Optional<u32> get_instruction_from_source(const String& file, size_t line) const;
private:
void prepare_lines();
NonnullRefPtr<const ELF::Loader> m_elf;
Vector<LineProgram::LineInfo> m_sorted_lines;
};

View file

@ -0,0 +1,247 @@
/*
* Copyright (c) 2020, Itamar S. <itamar8910@gmail.com>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "LineProgram.h"
LineProgram::LineProgram(BufferStream& stream)
: m_stream(stream)
{
m_unit_offset = m_stream.offset();
parse_unit_header();
parse_source_directories();
parse_source_files();
run_program();
}
void LineProgram::parse_unit_header()
{
m_stream.read_raw((u8*)&m_unit_header, sizeof(m_unit_header));
ASSERT(m_unit_header.version == DWARF_VERSION);
ASSERT(m_unit_header.opcode_base == SPECIAL_OPCODES_BASE);
#ifdef DWARF_DEBUG
dbg() << "unit length: " << unit_header.length;
#endif
}
void LineProgram::parse_source_directories()
{
m_source_directories.append(".");
while (m_stream.peek()) {
String directory;
m_stream >> directory;
#ifdef DWARF_DEBUG
dbg() << "directory: " << directory;
#endif
m_source_directories.append(move(directory));
}
m_stream.advance(1);
}
void LineProgram::parse_source_files()
{
m_source_files.append({ ".", 0 });
while (m_stream.peek()) {
String file_name;
m_stream >> file_name;
size_t directory_index = 0;
m_stream.read_LEB128_unsigned(directory_index);
size_t _unused = 0;
m_stream.read_LEB128_unsigned(_unused); // skip modification time
m_stream.read_LEB128_unsigned(_unused); // skip file size
#ifdef DWARF_DEBUG
dbg() << "file: " << file_name << ", directory index: " << directory_index;
#endif
m_source_files.append({ file_name, directory_index });
}
m_stream.advance(1);
ASSERT(!m_stream.handle_read_failure());
}
void LineProgram::append_to_line_info()
{
#ifdef DWARF_DEBUG
dbg() << "appending line info: " << (void*)address << ", " << files[file_index].name << ":" << line;
#endif
if (!m_is_statement)
return;
String directory = m_source_directories[m_source_files[m_file_index].directory_index];
String full_path = String::format("%s/%s", directory.characters(), m_source_files[m_file_index].name.characters());
m_lines.append({ m_address, full_path, m_line });
}
void LineProgram::reset_registers()
{
m_address = 0;
m_line = 1;
m_file_index = 1;
m_is_statement = m_unit_header.default_is_stmt == 1;
}
void LineProgram::handle_extended_opcode()
{
size_t length = 0;
m_stream.read_LEB128_unsigned(length);
u8 sub_opcode = 0;
m_stream >> sub_opcode;
switch (sub_opcode) {
case ExtendedOpcodes::EndSequence: {
append_to_line_info();
reset_registers();
break;
}
case ExtendedOpcodes::SetAddress: {
ASSERT(length == sizeof(size_t) + 1);
m_stream >> m_address;
#ifdef DWARF_DEBUG
dbg() << "SetAddress: " << (void*)address;
#endif
break;
}
case ExtendedOpcodes::SetDiscriminator: {
#ifdef DWARF_DEBUG
dbg() << "SetDiscriminator";
#endif
m_stream.advance(1);
break;
}
default:
#ifdef DWARF_DEBUG
dbg() << "offset: " << (void*)m_stream.offset();
#endif
ASSERT_NOT_REACHED();
}
}
void LineProgram::handle_standard_opcode(u8 opcode)
{
switch (opcode) {
case StandardOpcodes::Copy: {
append_to_line_info();
break;
}
case StandardOpcodes::AdvancePc: {
size_t operand = 0;
m_stream.read_LEB128_unsigned(operand);
size_t delta = operand * m_unit_header.min_instruction_length;
#ifdef DWARF_DEBUG
dbg() << "AdvnacePC by: " << delta << " to: " << (void*)(address + delta);
#endif
m_address += delta;
break;
}
case StandardOpcodes::SetFile: {
size_t new_file_index = 0;
m_stream.read_LEB128_unsigned(new_file_index);
#ifdef DWARF_DEBUG
dbg() << "SetFile: new file index: " << new_file_index;
#endif
m_file_index = new_file_index;
break;
}
case StandardOpcodes::SetColumn: {
// not implemented
#ifdef DWARF_DEBUG
dbg() << "SetColumn";
#endif
size_t new_column;
m_stream.read_LEB128_unsigned(new_column);
break;
}
case StandardOpcodes::AdvanceLine: {
ssize_t line_delta;
m_stream.read_LEB128_signed(line_delta);
// dbg() << "line_delta: " << line_delta;
ASSERT(line_delta >= 0 || m_line >= (size_t)(-line_delta));
m_line += line_delta;
#ifdef DWARF_DEBUG
dbg() << "AdvanceLine: " << line;
#endif
break;
}
case StandardOpcodes::NegateStatement: {
#ifdef DWARF_DEBUG
dbg() << "NegateStatement";
#endif
m_is_statement = !m_is_statement;
break;
}
case StandardOpcodes::ConstAddPc: {
u8 adjusted_opcode = 255 - SPECIAL_OPCODES_BASE;
ssize_t address_increment = (adjusted_opcode / m_unit_header.line_range) * m_unit_header.min_instruction_length;
address_increment *= m_unit_header.min_instruction_length;
#ifdef DWARF_DEBUG
dbg() << "ConstAddPc: advance pc by: " << address_increment << " to: " << (address + address_increment);
#endif
m_address += address_increment;
break;
}
default:
ASSERT_NOT_REACHED();
}
}
void LineProgram::handle_sepcial_opcode(u8 opcode)
{
u8 adjusted_opcode = opcode - SPECIAL_OPCODES_BASE;
ssize_t address_increment = (adjusted_opcode / m_unit_header.line_range) * m_unit_header.min_instruction_length;
ssize_t line_increment = m_unit_header.line_base + (adjusted_opcode % m_unit_header.line_range);
m_address += address_increment;
m_line += line_increment;
#ifdef DWARF_DEBUG
dbg() << "Special adjusted_opcode: " << adjusted_opcode << ", delta_address: " << address_increment << ", delta_line: " << line_increment;
dbg() << "Address is now:" << (void*)m_address << ", and line is: " << source_files[m_file_index].name << ":" << line;
#endif
append_to_line_info();
}
void LineProgram::run_program()
{
reset_registers();
while ((size_t)m_stream.offset() < m_unit_offset + sizeof(u32) + m_unit_header.length) {
u8 opcode = 0;
m_stream >> opcode;
#ifdef DWARF_DEBUG
dbg() << (void*)(m_stream.offset() - 1) << ": opcode: " << opcode;
#endif
if (opcode == 0) {
handle_extended_opcode();
} else if (opcode >= 1 && opcode <= 12) {
handle_standard_opcode(opcode);
} else {
handle_sepcial_opcode(opcode);
}
}
}

View file

@ -0,0 +1,113 @@
/*
* Copyright (c) 2020, Itamar S. <itamar8910@gmail.com>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <AK/BufferStream.h>
#include <AK/Vector.h>
class LineProgram {
public:
explicit LineProgram(BufferStream& stream);
struct LineInfo {
u32 address { 0 };
String file;
size_t line { 0 };
};
const Vector<LineInfo>& lines() const { return m_lines; }
private:
void parse_unit_header();
void parse_source_directories();
void parse_source_files();
void run_program();
void append_to_line_info();
void reset_registers();
void handle_extended_opcode();
void handle_standard_opcode(u8 opcode);
void handle_sepcial_opcode(u8 opcode);
struct [[gnu::packed]] UnitHeader32
{
u32 length;
u16 version;
u32 header_length;
u8 min_instruction_length;
u8 default_is_stmt;
i8 line_base;
u8 line_range;
u8 opcode_base;
u8 std_opcode_lengths[12];
};
enum StandardOpcodes {
Copy = 1,
AdvancePc,
AdvanceLine,
SetFile,
SetColumn,
NegateStatement,
SetBasicBlock,
ConstAddPc,
FixAdvancePc,
SetProlougeEnd,
SetEpilogueBegin,
SetIsa
};
enum ExtendedOpcodes {
EndSequence = 1,
SetAddress,
DefineFile,
SetDiscriminator,
};
struct FileEntry {
String name;
size_t directory_index { 0 };
};
static constexpr u16 DWARF_VERSION = 3;
static constexpr u8 SPECIAL_OPCODES_BASE = 13;
BufferStream& m_stream;
size_t m_unit_offset { 0 };
UnitHeader32 m_unit_header {};
Vector<String> m_source_directories;
Vector<FileEntry> m_source_files;
// The registers of the "line program" virtual machine
u32 m_address { 0 };
size_t m_line { 0 };
size_t m_file_index { 0 };
bool m_is_statement { false };
Vector<LineInfo> m_lines;
};