aboutsummaryrefslogtreecommitdiff
path: root/externals/breakpad/src/processor/disassembler_objdump.cc
diff options
context:
space:
mode:
Diffstat (limited to 'externals/breakpad/src/processor/disassembler_objdump.cc')
-rw-r--r--externals/breakpad/src/processor/disassembler_objdump.cc487
1 files changed, 487 insertions, 0 deletions
diff --git a/externals/breakpad/src/processor/disassembler_objdump.cc b/externals/breakpad/src/processor/disassembler_objdump.cc
new file mode 100644
index 0000000000..9f9569a5e6
--- /dev/null
+++ b/externals/breakpad/src/processor/disassembler_objdump.cc
@@ -0,0 +1,487 @@
+// Copyright (c) 2022, Google LLC
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// disassembler_objdump.: Disassembler that invokes objdump for disassembly.
+//
+// Author: Mark Brand
+
+#ifdef HAVE_CONFIG_H
+#include <config.h> // Must come first
+#endif
+
+#include "processor/disassembler_objdump.h"
+
+#include <unistd.h>
+#include <sys/wait.h>
+
+#include <array>
+#include <fstream>
+#include <iostream>
+#include <iterator>
+#include <regex>
+#include <sstream>
+#include <vector>
+
+#include "common/linux/eintr_wrapper.h"
+#include "common/linux/scoped_pipe.h"
+#include "common/linux/scoped_tmpfile.h"
+#include "processor/logging.h"
+
+namespace google_breakpad {
+namespace {
+
+const size_t kMaxX86InstructionLength = 15;
+
+bool IsInstructionPrefix(const string& token) {
+ if (token == "lock" || token == "rep" || token == "repz" ||
+ token == "repnz") {
+ return true;
+ }
+ return false;
+}
+
+bool IsOperandSize(const string& token) {
+ if (token == "BYTE" || token == "WORD" || token == "DWORD" ||
+ token == "QWORD" || token == "PTR") {
+ return true;
+ }
+ return false;
+}
+
+bool GetSegmentAddressX86(const DumpContext& context, string segment_name,
+ uint64_t& address) {
+ if (segment_name == "ds") {
+ address = context.GetContextX86()->ds;
+ } else if (segment_name == "es") {
+ address = context.GetContextX86()->es;
+ } else if (segment_name == "fs") {
+ address = context.GetContextX86()->fs;
+ } else if (segment_name == "gs") {
+ address = context.GetContextX86()->gs;
+ } else {
+ BPLOG(ERROR) << "Unsupported segment register: " << segment_name;
+ return false;
+ }
+
+ return true;
+}
+
+bool GetSegmentAddressAMD64(const DumpContext& context, string segment_name,
+ uint64_t& address) {
+ if (segment_name == "ds") {
+ address = 0;
+ } else if (segment_name == "es") {
+ address = 0;
+ } else {
+ BPLOG(ERROR) << "Unsupported segment register: " << segment_name;
+ return false;
+ }
+
+ return true;
+}
+
+bool GetSegmentAddress(const DumpContext& context, string segment_name,
+ uint64_t& address) {
+ if (context.GetContextCPU() == MD_CONTEXT_X86) {
+ return GetSegmentAddressX86(context, segment_name, address);
+ } else if (context.GetContextCPU() == MD_CONTEXT_AMD64) {
+ return GetSegmentAddressAMD64(context, segment_name, address);
+ } else {
+ BPLOG(ERROR) << "Unsupported architecture for GetSegmentAddress\n";
+ return false;
+ }
+}
+
+bool GetRegisterValueX86(const DumpContext& context, string register_name,
+ uint64_t& value) {
+ if (register_name == "eax") {
+ value = context.GetContextX86()->eax;
+ } else if (register_name == "ebx") {
+ value = context.GetContextX86()->ebx;
+ } else if (register_name == "ecx") {
+ value = context.GetContextX86()->ecx;
+ } else if (register_name == "edx") {
+ value = context.GetContextX86()->edx;
+ } else if (register_name == "edi") {
+ value = context.GetContextX86()->edi;
+ } else if (register_name == "esi") {
+ value = context.GetContextX86()->esi;
+ } else if (register_name == "ebp") {
+ value = context.GetContextX86()->ebp;
+ } else if (register_name == "esp") {
+ value = context.GetContextX86()->esp;
+ } else if (register_name == "eip") {
+ value = context.GetContextX86()->eip;
+ } else {
+ BPLOG(ERROR) << "Unsupported register: " << register_name;
+ return false;
+ }
+
+ return true;
+}
+
+bool GetRegisterValueAMD64(const DumpContext& context, string register_name,
+ uint64_t& value) {
+ if (register_name == "rax") {
+ value = context.GetContextAMD64()->rax;
+ } else if (register_name == "rbx") {
+ value = context.GetContextAMD64()->rbx;
+ } else if (register_name == "rcx") {
+ value = context.GetContextAMD64()->rcx;
+ } else if (register_name == "rdx") {
+ value = context.GetContextAMD64()->rdx;
+ } else if (register_name == "rdi") {
+ value = context.GetContextAMD64()->rdi;
+ } else if (register_name == "rsi") {
+ value = context.GetContextAMD64()->rsi;
+ } else if (register_name == "rbp") {
+ value = context.GetContextAMD64()->rbp;
+ } else if (register_name == "rsp") {
+ value = context.GetContextAMD64()->rsp;
+ } else if (register_name == "r8") {
+ value = context.GetContextAMD64()->r8;
+ } else if (register_name == "r9") {
+ value = context.GetContextAMD64()->r9;
+ } else if (register_name == "r10") {
+ value = context.GetContextAMD64()->r10;
+ } else if (register_name == "r11") {
+ value = context.GetContextAMD64()->r11;
+ } else if (register_name == "r12") {
+ value = context.GetContextAMD64()->r12;
+ } else if (register_name == "r13") {
+ value = context.GetContextAMD64()->r13;
+ } else if (register_name == "r14") {
+ value = context.GetContextAMD64()->r14;
+ } else if (register_name == "r15") {
+ value = context.GetContextAMD64()->r15;
+ } else if (register_name == "rip") {
+ value = context.GetContextAMD64()->rip;
+ } else {
+ BPLOG(ERROR) << "Unsupported register: " << register_name;
+ return false;
+ }
+
+ return true;
+}
+
+// Lookup the value of `register_name` in `context`, store it into `value` on
+// success.
+// Support for non-full-size registers not implemented, since we're only using
+// this to evaluate address expressions.
+bool GetRegisterValue(const DumpContext& context, string register_name,
+ uint64_t& value) {
+ if (context.GetContextCPU() == MD_CONTEXT_X86) {
+ return GetRegisterValueX86(context, register_name, value);
+ } else if (context.GetContextCPU() == MD_CONTEXT_AMD64) {
+ return GetRegisterValueAMD64(context, register_name, value);
+ } else {
+ BPLOG(ERROR) << "Unsupported architecture for GetRegisterValue\n";
+ return false;
+ }
+}
+} // namespace
+
+// static
+bool DisassemblerObjdump::DisassembleInstruction(uint32_t cpu,
+ const uint8_t* raw_bytes,
+ unsigned int raw_bytes_len,
+ string& instruction) {
+ // Always initialize outputs
+ instruction = "";
+
+ if (!raw_bytes || raw_bytes_len == 0) {
+ // There's no need to perform any operation in this case, as there's
+ // clearly no instruction there.
+ return false;
+ }
+
+ string architecture;
+ if (cpu == MD_CONTEXT_X86) {
+ architecture = "i386";
+ } else if (cpu == MD_CONTEXT_AMD64) {
+ architecture = "i386:x86-64";
+ } else {
+ BPLOG(ERROR) << "Unsupported architecture.";
+ return false;
+ }
+
+ // Create a temporary file for the raw instruction bytes to pass to
+ // objdump, and write the bytes to the input file.
+ ScopedTmpFile raw_bytes_file;
+ if (!raw_bytes_file.InitData(raw_bytes, raw_bytes_len)) {
+ BPLOG(ERROR) << "Failed creating temporary file.";
+ return false;
+ }
+
+ // Create a pipe to use to read the disassembly back from objdump.
+ ScopedPipe disassembly_pipe;
+ if (!disassembly_pipe.Init()) {
+ BPLOG(ERROR) << "Failed creating pipe for output.";
+ return false;
+ }
+
+ pid_t child_pid = fork();
+ if (child_pid < 0) {
+ BPLOG(ERROR) << "Fork failed.";
+ return false;
+ }
+
+ if (child_pid == 0) {
+ // In the child process, set up the input and output file descriptors.
+ if (dup2(raw_bytes_file.GetFd(), STDIN_FILENO) < 0 ||
+ disassembly_pipe.Dup2WriteFd(STDOUT_FILENO) < 0 ||
+ disassembly_pipe.Dup2WriteFd(STDERR_FILENO) < 0) {
+ BPLOG(ERROR) << "Failed dup'ing file descriptors.";
+ exit(-1);
+ }
+
+ // We need to close the read end of the pipe in the child process so that
+ // when the parent closes it, the pipe is disconnected.
+ disassembly_pipe.CloseReadFd();
+
+ // We use "/proc/self/fd/0" here to allow objdump to parse an unnamed file,
+ // since objdump does not have a mode to read from stdin. This cannot be
+ // used with a pipe, since objdump requires that the input is a standard
+ // file.
+ execlp("objdump", "objdump", "-D", "--no-show-raw-insn", "-b", "binary",
+ "-M", "intel", "-m", architecture.c_str(), "/proc/self/fd/0",
+ nullptr);
+
+ BPLOG(ERROR) << "Failed to exec objdump.";
+ exit(-1);
+ } else {
+ // In the parent process, parse the objdump output.
+
+ // Match the instruction line, from:
+ // 0: lock cmpxchg DWORD PTR [esi+0x10],eax
+ // extract the string "lock cmpxchg DWORD PTR [esi+0x10],eax"
+ std::regex instruction_regex(
+ "^\\s+[0-9a-f]+:\\s+" // " 0:"
+ "((?:\\s*\\S*)+)$"); // "lock cmpxchg..."
+
+ std::string line;
+ std::smatch match;
+ while (disassembly_pipe.ReadLine(line)) {
+ if (std::regex_match(line, match, instruction_regex)) {
+ instruction = match[1].str();
+ break;
+ }
+ }
+
+ // Close the read pipe so that objdump will exit (in case we broke out of
+ // the loop above before reading all of the output).
+ disassembly_pipe.CloseReadFd();
+
+ // Now wait for objdump to exit.
+ int status = 0;
+ HANDLE_EINTR(waitpid(child_pid, &status, 0));
+
+ if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) {
+ BPLOG(ERROR) << "objdump didn't run successfully.";
+ return false;
+ }
+
+ if (instruction == "") {
+ BPLOG(ERROR) << "Failed to find instruction in objdump output.";
+ return false;
+ }
+ }
+
+ return true;
+}
+
+// static
+bool DisassemblerObjdump::TokenizeInstruction(const string& instruction,
+ string& operation, string& dest,
+ string& src) {
+ // Always initialize outputs.
+ operation = "";
+ dest = "";
+ src = "";
+
+ // Split the instruction into tokens by either whitespace or comma.
+ std::regex token_regex("((?:[^\\s,]+)|,)(?:\\s)*");
+ std::sregex_iterator tokens_begin(instruction.begin(), instruction.end(),
+ token_regex);
+
+ bool found_comma = false;
+ for (auto tokens_iter = tokens_begin; tokens_iter != std::sregex_iterator();
+ ++tokens_iter) {
+ auto token = (*tokens_iter)[1].str();
+ if (operation.size() == 0) {
+ if (IsInstructionPrefix(token))
+ continue;
+ operation = token;
+ } else if (dest.size() == 0) {
+ if (IsOperandSize(token))
+ continue;
+ dest = token;
+ } else if (!found_comma) {
+ if (token == ",") {
+ found_comma = true;
+ } else {
+ BPLOG(ERROR) << "Failed to parse operands from objdump output, expected"
+ " comma but found \""
+ << token << "\"";
+ return false;
+ }
+ } else if (src.size() == 0) {
+ if (IsOperandSize(token))
+ continue;
+ src = token;
+ } else {
+ if (token == ",") {
+ BPLOG(ERROR) << "Failed to parse operands from objdump output, found "
+ "unexpected comma after last operand.";
+ return false;
+ } else {
+ // We just ignore other junk after the last operand unless it's a
+ // comma, which would indicate we're probably still in the middle
+ // of the operands and something has gone wrong
+ }
+ }
+ }
+
+ if (found_comma && src.size() == 0) {
+ BPLOG(ERROR) << "Failed to parse operands from objdump output, found comma "
+ "but no src operand.";
+ return false;
+ }
+
+ return true;
+}
+
+// static
+bool DisassemblerObjdump::CalculateAddress(const DumpContext& context,
+ const string& expression,
+ uint64_t& address) {
+ address = 0;
+
+ // Extract the components of the expression.
+ // fs:[esi+edi*4+0x80] -> ["fs", "esi", "edi", "4", "-", "0x80"]
+ std::regex expression_regex(
+ "^(?:(\\ws):)?" // "fs:"
+ "\\[(\\w+)" // "[esi"
+ "(?:\\+(\\w+)(?:\\*(\\d+)))?" // "+edi*4"
+ "(?:([\\+-])(0x[0-9a-f]+))?" // "-0x80"
+ "\\]$"); // "]"
+
+ std::smatch match;
+ if (!std::regex_match(expression, match, expression_regex) ||
+ match.size() != 7) {
+ return false;
+ }
+
+ string segment_name = match[1].str();
+ string register_name = match[2].str();
+ string index_name = match[3].str();
+ string index_stride = match[4].str();
+ string offset_sign = match[5].str();
+ string offset = match[6].str();
+
+ uint64_t segment_address = 0;
+ uint64_t register_value = 0;
+ uint64_t index_value = 0;
+ uint64_t index_stride_value = 1;
+ uint64_t offset_value = 0;
+
+ if (segment_name.size() &&
+ !GetSegmentAddress(context, segment_name, segment_address)) {
+ return false;
+ }
+
+ if (!GetRegisterValue(context, register_name, register_value)) {
+ return false;
+ }
+
+ if (index_name.size() &&
+ !GetRegisterValue(context, index_name, index_value)) {
+ return false;
+ }
+
+ if (index_stride.size()) {
+ index_stride_value = strtoull(index_stride.c_str(), nullptr, 0);
+ }
+
+ if (offset.size()) {
+ offset_value = strtoull(offset.c_str(), nullptr, 0);
+ }
+
+ address =
+ segment_address + register_value + (index_value * index_stride_value);
+ if (offset_sign == "+") {
+ address += offset_value;
+ } else if (offset_sign == "-") {
+ address -= offset_value;
+ }
+
+ return true;
+}
+
+DisassemblerObjdump::DisassemblerObjdump(const uint32_t cpu,
+ const MemoryRegion* memory_region,
+ uint64_t address) {
+ if (address < memory_region->GetBase() ||
+ memory_region->GetBase() + memory_region->GetSize() <= address) {
+ return;
+ }
+
+ uint8_t ip_bytes[kMaxX86InstructionLength] = {0};
+ size_t ip_bytes_length;
+ for (ip_bytes_length = 0; ip_bytes_length < kMaxX86InstructionLength;
+ ++ip_bytes_length) {
+ // We have to read byte-by-byte here, since we still want to try and
+ // disassemble an instruction even if we don't have enough bytes.
+ if (!memory_region->GetMemoryAtAddress(address + ip_bytes_length,
+ &ip_bytes[ip_bytes_length])) {
+ break;
+ }
+ }
+
+ string instruction;
+ if (!DisassembleInstruction(cpu, ip_bytes, kMaxX86InstructionLength,
+ instruction)) {
+ return;
+ }
+
+ if (!TokenizeInstruction(instruction, operation_, dest_, src_)) {
+ return;
+ }
+}
+
+bool DisassemblerObjdump::CalculateSrcAddress(const DumpContext& context,
+ uint64_t& address) {
+ return CalculateAddress(context, src_, address);
+}
+
+bool DisassemblerObjdump::CalculateDestAddress(const DumpContext& context,
+ uint64_t& address) {
+ return CalculateAddress(context, dest_, address);
+}
+
+} // namespace google_breakpad \ No newline at end of file