// -*- mode: c++ -*-

// Copyright 2010 Google LLC
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google LLC nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>

// module.h: Define google_breakpad::Module. A Module holds debugging
// information, and can write that information out as a Breakpad
// symbol file.

#ifndef COMMON_LINUX_MODULE_H__
#define COMMON_LINUX_MODULE_H__

#include <functional>
#include <iostream>
#include <limits>
#include <map>
#include <memory>
#include <set>
#include <string>
#include <vector>

#include "common/string_view.h"
#include "common/symbol_data.h"
#include "common/unordered.h"
#include "common/using_std_string.h"
#include "google_breakpad/common/breakpad_types.h"

namespace google_breakpad {

using std::set;
using std::vector;
using std::map;

// A Module represents the contents of a module, and supports methods
// for adding information produced by parsing STABS or DWARF data
// --- possibly both from the same file --- and then writing out the
// unified contents as a Breakpad-format symbol file.
class Module {
 public:
  // The type of addresses and sizes in a symbol table.
  typedef uint64_t Address;
  static constexpr uint64_t kMaxAddress = std::numeric_limits<Address>::max();
  struct File;
  struct Function;
  struct InlineOrigin;
  struct Inline;
  struct Line;
  struct Extern;

  // Addresses appearing in File, Function, and Line structures are
  // absolute, not relative to the the module's load address.  That
  // is, if the module were loaded at its nominal load address, the
  // addresses would be correct.

  // A source file.
  struct File {
    explicit File(const string& name_input) : name(name_input), source_id(0) {}

    // The name of the source file.
    const string name;

    // The file's source id.  The Write member function clears this
    // field and assigns source ids a fresh, so any value placed here
    // before calling Write will be lost.
    int source_id;
  };

  // An address range.
  struct Range {
    Range(const Address address_input, const Address size_input) :
        address(address_input), size(size_input) { }

    Address address;
    Address size;
  };

  // A function.
  struct Function {
    Function(StringView name_input, const Address& address_input) :
        name(name_input), address(address_input), parameter_size(0) {}

    // For sorting by address.  (Not style-guide compliant, but it's
    // stupid not to put this in the struct.)
    static bool CompareByAddress(const Function* x, const Function* y) {
      return x->address < y->address;
    }

    // The function's name.
    StringView name;

    // The start address and the address ranges covered by the function.
    const Address address;
    vector<Range> ranges;

    // The function's parameter size.
    Address parameter_size;

    // Source lines belonging to this function, sorted by increasing
    // address.
    vector<Line> lines;

    // Inlined call sites belonging to this functions.
    vector<std::unique_ptr<Inline>> inlines;

    // If this symbol has been folded with other symbols in the linked binary.
    bool is_multiple = false;

    // If the function's name should be filled out from a matching Extern,
    // should they not match.
    bool prefer_extern_name = false;
  };

  struct InlineOrigin {
    explicit InlineOrigin(StringView name) : id(-1), name(name) {}

    // A unique id for each InlineOrigin object. INLINE records use the id to
    // refer to its INLINE_ORIGIN record.
    int id;

    // The inlined function's name.
    StringView name;
  };

  // A inlined call site.
  struct Inline {
    Inline(InlineOrigin* origin,
           const vector<Range>& ranges,
           int call_site_line,
           int call_site_file_id,
           int inline_nest_level,
           vector<std::unique_ptr<Inline>> child_inlines)
        : origin(origin),
          ranges(ranges),
          call_site_line(call_site_line),
          call_site_file_id(call_site_file_id),
          call_site_file(nullptr),
          inline_nest_level(inline_nest_level),
          child_inlines(std::move(child_inlines)) {}

    InlineOrigin* origin;

    // The list of addresses and sizes.
    vector<Range> ranges;

    int call_site_line;

    // The id is only meanful inside a CU. It's only used for looking up real
    // File* after scanning a CU.
    int call_site_file_id;

    File* call_site_file;

    int inline_nest_level;

    // A list of inlines which are children of this inline.
    vector<std::unique_ptr<Inline>> child_inlines;

    int getCallSiteFileID() const {
      return call_site_file ? call_site_file->source_id : -1;
    }

    static void InlineDFS(
        vector<std::unique_ptr<Module::Inline>>& inlines,
        std::function<void(std::unique_ptr<Module::Inline>&)> const& forEach) {
      for (std::unique_ptr<Module::Inline>& in : inlines) {
        forEach(in);
        InlineDFS(in->child_inlines, forEach);
      }
    }
  };

  typedef map<uint64_t, InlineOrigin*> InlineOriginByOffset;

  class InlineOriginMap {
   public:
    // Add INLINE ORIGIN to the module. Return a pointer to origin .
    InlineOrigin* GetOrCreateInlineOrigin(uint64_t offset, StringView name);

    // offset is the offset of a DW_TAG_subprogram. specification_offset is the
    // value of its DW_AT_specification or equals to offset if
    // DW_AT_specification doesn't exist in that DIE.
    void SetReference(uint64_t offset, uint64_t specification_offset);

    ~InlineOriginMap() {
      for (const auto& iter : inline_origins_) {
        delete iter.second;
      }
    }

   private:
    // A map from a DW_TAG_subprogram's offset to the DW_TAG_subprogram.
    InlineOriginByOffset inline_origins_;

    // A map from a DW_TAG_subprogram's offset to the offset of its
    // specification or abstract origin subprogram. The set of values in this
    // map should always be the same set of keys in inline_origins_.
    map<uint64_t, uint64_t> references_;
  };

  map<std::string, InlineOriginMap> inline_origin_maps;

  // A source line.
  struct Line {
    // For sorting by address.  (Not style-guide compliant, but it's
    // stupid not to put this in the struct.)
    static bool CompareByAddress(const Module::Line& x, const Module::Line& y) {
      return x.address < y.address;
    }

    Address address, size;    // The address and size of the line's code.
    File* file;                // The source file.
    int number;                // The source line number.
  };

  // An exported symbol.
  struct Extern {
    explicit Extern(const Address& address_input) : address(address_input) {}
    const Address address;
    string name;
    // If this symbol has been folded with other symbols in the linked binary.
    bool is_multiple = false;
  };

  // A map from register names to postfix expressions that recover
  // their their values. This can represent a complete set of rules to
  // follow at some address, or a set of changes to be applied to an
  // extant set of rules.
  typedef map<string, string> RuleMap;

  // A map from addresses to RuleMaps, representing changes that take
  // effect at given addresses.
  typedef map<Address, RuleMap> RuleChangeMap;

  // A range of 'STACK CFI' stack walking information. An instance of
  // this structure corresponds to a 'STACK CFI INIT' record and the
  // subsequent 'STACK CFI' records that fall within its range.
  struct StackFrameEntry {
    // The starting address and number of bytes of machine code this
    // entry covers.
    Address address, size;

    // The initial register recovery rules, in force at the starting
    // address.
    RuleMap initial_rules;

    // A map from addresses to rule changes. To find the rules in
    // force at a given address, start with initial_rules, and then
    // apply the changes given in this map for all addresses up to and
    // including the address you're interested in.
    RuleChangeMap rule_changes;
  };

  struct FunctionCompare {
    bool operator() (const Function* lhs, const Function* rhs) const {
      if (lhs->address == rhs->address)
        return lhs->name < rhs->name;
      return lhs->address < rhs->address;
    }
  };

  struct InlineOriginCompare {
    bool operator()(const InlineOrigin* lhs, const InlineOrigin* rhs) const {
      return lhs->name < rhs->name;
    }
  };

  struct ExternCompare {
    // Defining is_transparent allows
    // std::set<std::unique_ptr<Extern>, ExternCompare>::find() to be called
    // with an Extern* and have set use the overloads below.
    using is_transparent = void;
    bool operator() (const std::unique_ptr<Extern>& lhs,
                     const std::unique_ptr<Extern>& rhs) const {
      return lhs->address < rhs->address;
    }
    bool operator() (const Extern* lhs, const std::unique_ptr<Extern>& rhs) const {
      return lhs->address < rhs->address;
    }
    bool operator() (const std::unique_ptr<Extern>& lhs, const Extern* rhs) const {
      return lhs->address < rhs->address;
    }
  };

  // Create a new module with the given name, operating system,
  // architecture, and ID string.
  // NB: `enable_multiple_field` is temporary while transitioning to enabling
  // writing the multiple field permanently.
  Module(const string& name,
         const string& os,
         const string& architecture,
         const string& id,
         const string& code_id = "",
         bool enable_multiple_field = false,
         bool prefer_extern_name = false);
  ~Module();

  // Set the module's load address to LOAD_ADDRESS; addresses given
  // for functions and lines will be written to the Breakpad symbol
  // file as offsets from this address.  Construction initializes this
  // module's load address to zero: addresses written to the symbol
  // file will be the same as they appear in the Function, Line, and
  // StackFrameEntry structures.
  //
  // Note that this member function has no effect on addresses stored
  // in the data added to this module; the Write member function
  // simply subtracts off the load address from addresses before it
  // prints them. Only the last load address given before calling
  // Write is used.
  void SetLoadAddress(Address load_address);

  // Sets address filtering on elements added to the module.  This allows
  // libraries with extraneous debug symbols to generate symbol files containing
  // only relevant symbols.  For example, an LLD-generated partition library may
  // contain debug information pertaining to all partitions derived from a
  // single "combined" library.  Filtering applies only to elements added after
  // this method is called.
  void SetAddressRanges(const vector<Range>& ranges);

  // Add FUNCTION to the module. FUNCTION's name must not be empty.
  // This module owns all Function objects added with this function:
  // destroying the module destroys them as well.
  // Return false if the function is duplicate and needs to be freed.
  bool AddFunction(Function* function);

  // Add STACK_FRAME_ENTRY to the module.
  // This module owns all StackFrameEntry objects added with this
  // function: destroying the module destroys them as well.
  void AddStackFrameEntry(std::unique_ptr<StackFrameEntry> stack_frame_entry);

  // Add PUBLIC to the module.
  // This module owns all Extern objects added with this function:
  // destroying the module destroys them as well.
  void AddExtern(std::unique_ptr<Extern> ext);

  // If this module has a file named NAME, return a pointer to it. If
  // it has none, then create one and return a pointer to the new
  // file. This module owns all File objects created using these
  // functions; destroying the module destroys them as well.
  File* FindFile(const string& name);
  File* FindFile(const char* name);

  // If this module has a file named NAME, return a pointer to it.
  // Otherwise, return NULL.
  File* FindExistingFile(const string& name);

  // Insert pointers to the functions added to this module at I in
  // VEC. The pointed-to Functions are still owned by this module.
  // (Since this is effectively a copy of the function list, this is
  // mostly useful for testing; other uses should probably get a more
  // appropriate interface.)
  void GetFunctions(vector<Function*>* vec, vector<Function*>::iterator i);

  // Insert pointers to the externs added to this module at I in
  // VEC. The pointed-to Externs are still owned by this module.
  // (Since this is effectively a copy of the extern list, this is
  // mostly useful for testing; other uses should probably get a more
  // appropriate interface.)
  void GetExterns(vector<Extern*>* vec, vector<Extern*>::iterator i);

  // Clear VEC and fill it with pointers to the Files added to this
  // module, sorted by name. The pointed-to Files are still owned by
  // this module. (Since this is effectively a copy of the file list,
  // this is mostly useful for testing; other uses should probably get
  // a more appropriate interface.)
  void GetFiles(vector<File*>* vec);

  // Clear VEC and fill it with pointers to the StackFrameEntry
  // objects that have been added to this module. (Since this is
  // effectively a copy of the stack frame entry list, this is mostly
  // useful for testing; other uses should probably get
  // a more appropriate interface.)
  void GetStackFrameEntries(vector<StackFrameEntry*>* vec) const;

  // Find those files in this module that are actually referred to by
  // functions' line number data, and assign them source id numbers.
  // Set the source id numbers for all other files --- unused by the
  // source line data --- to -1.  We do this before writing out the
  // symbol file, at which point we omit any unused files.
  void AssignSourceIds();

  // This function should be called before AssignSourceIds() to get the set of
  // valid InlineOrigins*.
  void CreateInlineOrigins(
      set<InlineOrigin*, InlineOriginCompare>& inline_origins);

  // Call AssignSourceIds, and write this module to STREAM in the
  // breakpad symbol format. Return true if all goes well, or false if
  // an error occurs. This method writes out:
  // - a header based on the values given to the constructor,
  // If symbol_data is not CFI then:
  // - the source files added via FindFile,
  // - the functions added via AddFunctions, each with its lines,
  // - all public records,
  // If symbol_data is CFI then:
  // - all CFI records.
  // Addresses in the output are all relative to the load address
  // established by SetLoadAddress.
  bool Write(std::ostream& stream, SymbolData symbol_data);

  // Place the name in the global set of strings. Return a StringView points to
  // a string inside the pool.
  StringView AddStringToPool(const string& str) {
    auto result = common_strings_.insert(str);
    return *(result.first);
  }

  string name() const { return name_; }
  string os() const { return os_; }
  string architecture() const { return architecture_; }
  string identifier() const { return id_; }
  string code_identifier() const { return code_id_; }

 private:
  // Report an error that has occurred writing the symbol file, using
  // errno to find the appropriate cause.  Return false.
  static bool ReportError();

  // Write RULE_MAP to STREAM, in the form appropriate for 'STACK CFI'
  // records, without a final newline. Return true if all goes well;
  // if an error occurs, return false, and leave errno set.
  static bool WriteRuleMap(const RuleMap& rule_map, std::ostream& stream);

  // Returns true of the specified address resides with an specified address
  // range, or if no ranges have been specified.
  bool AddressIsInModule(Address address) const;

  // Module header entries.
  string name_, os_, architecture_, id_, code_id_;

  // The module's nominal load address.  Addresses for functions and
  // lines are absolute, assuming the module is loaded at this
  // address.
  Address load_address_;

  // The set of valid address ranges of the module.  If specified, attempts to
  // add elements residing outside these ranges will be silently filtered.
  vector<Range> address_ranges_;

  // Relation for maps whose keys are strings shared with some other
  // structure.
  struct CompareStringPtrs {
    bool operator()(const string* x, const string* y) const { return *x < *y; }
  };

  // A map from filenames to File structures.  The map's keys are
  // pointers to the Files' names.
  typedef map<const string*, File*, CompareStringPtrs> FileByNameMap;

  // A set containing Function structures, sorted by address.
  typedef set<Function*, FunctionCompare> FunctionSet;

  // A set containing Extern structures, sorted by address.
  typedef set<std::unique_ptr<Extern>, ExternCompare> ExternSet;

  // The module owns all the files and functions that have been added
  // to it; destroying the module frees the Files and Functions these
  // point to.
  FileByNameMap files_;    // This module's source files.
  FunctionSet functions_;  // This module's functions.
  // Used to quickly look up whether a function exists at a particular address.
  unordered_set<Address> function_addresses_;

  // The module owns all the call frame info entries that have been
  // added to it.
  vector<std::unique_ptr<StackFrameEntry>> stack_frame_entries_;

  // The module owns all the externs that have been added to it;
  // destroying the module frees the Externs these point to.
  ExternSet externs_;

  unordered_set<string> common_strings_;

  // Whether symbols sharing an address should be collapsed into a single entry
  // and marked with an `m` in the output. See
  // https://bugs.chromium.org/p/google-breakpad/issues/detail?id=751 and docs
  // at
  // https://chromium.googlesource.com/breakpad/breakpad/+/master/docs/symbol_files.md#records-3
  bool enable_multiple_field_;

  // If a Function and an Extern share the same address but have a different
  // name, prefer the name of the Extern.
  //
  // Use this when dumping Mach-O .dSYMs built with -gmlt (Minimum Line Tables),
  // as the Function's fully-qualified name will only be present in the STABS
  // (which are placed in the Extern), not in the DWARF symbols (which are
  // placed in the Function).
  bool prefer_extern_name_;
};

}  // namespace google_breakpad

#endif  // COMMON_LINUX_MODULE_H__