// Copyright 2006 Google LLC
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google LLC nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

// PDBSourceLineWriter uses a pdb file produced by Visual C++ to output
// a line/address map for use with BasicSourceLineResolver.

#ifndef COMMON_WINDOWS_PDB_SOURCE_LINE_WRITER_H_
#define COMMON_WINDOWS_PDB_SOURCE_LINE_WRITER_H_

#include <atlcomcli.h>

#include <map>
#include <memory>
#include <string>
#include <unordered_map>
#include <vector>

#include "common/windows/module_info.h"
#include "common/windows/omap.h"

struct IDiaEnumLineNumbers;
struct IDiaSession;
struct IDiaSymbol;

namespace google_breakpad {

using std::map;
using std::vector;
using std::wstring;
using std::unordered_map;

class PDBSourceLineWriter {
 public:
  enum FileFormat {
    PDB_FILE,  // a .pdb file containing debug symbols
    EXE_FILE,  // a .exe or .dll file
    ANY_FILE   // try PDB_FILE and then EXE_FILE
  };

  explicit PDBSourceLineWriter(bool handle_inline = false);
  ~PDBSourceLineWriter();

  // Opens the given file.  For executable files, the corresponding pdb
  // file must be available; Open will be if it is not.
  // If there is already a pdb file open, it is automatically closed.
  // Returns true on success.
  bool Open(const wstring& file, FileFormat format);

  // Closes the current pdb file and its associated resources.
  void Close();

  // Sets the code file full path.  This is optional for 32-bit modules.  It is
  // also optional for 64-bit modules when there is an executable file stored
  // in the same directory as the PDB file.  It is only required for 64-bit
  // modules when the executable file is not in the same location as the PDB
  // file and it must be called after Open() and before WriteMap().
  // If Open() was called for an executable file, then it is an error to call
  // SetCodeFile() with a different file path and it will return false.
  bool SetCodeFile(const wstring& exe_file);

  // Writes a Breakpad symbol file from the current pdb file to |symbol_file|.
  // Returns true on success.
  bool WriteSymbols(FILE *symbol_file);

  // Retrieves information about the module's debugging file.  Returns
  // true on success and false on failure.
  bool GetModuleInfo(PDBModuleInfo *info);

  // Retrieves information about the module's PE file.  Returns
  // true on success and false on failure.
  bool GetPEInfo(PEModuleInfo *info);

  // Sets uses_guid to true if the opened file uses a new-style CodeView
  // record with a 128-bit GUID, or false if the opened file uses an old-style
  // CodeView record.  When no GUID is available, a 32-bit signature should be
  // used to identify the module instead.  If the information cannot be
  // determined, this method returns false.
  bool UsesGUID(bool *uses_guid);

 private:
  // InlineOrigin represents INLINE_ORIGIN record in a symbol file. It's an
  // inlined function.
  struct InlineOrigin {
    // The unique id for an InlineOrigin.
    int id;
    // The name of the inlined function.
    wstring name;
  };

  // Line represents LINE record in a symbol file. It represents a source code
  // line.
  struct Line {
    // The relative address of a line.
    DWORD rva;
    // The number bytes this line has.
    DWORD length;
    // The source line number.
    DWORD line_num;
    // The source file id where the source line is located at.
    DWORD file_id;
  };

  // Inline represents INLINE record in a symbol file.
  class Inline {
   public:
    explicit Inline(int inline_nest_level);

    void SetOriginId(int origin_id);

    // Adding inlinee line's range into ranges. If line is adjacent with any
    // existing lines, extend the range. Otherwise, add line as a new range.
    void ExtendRanges(const Line& line);

    void SetCallSiteLine(DWORD call_site_line);

    void SetCallSiteFileId(DWORD call_site_file_id);

    void SetChildInlines(std::vector<std::unique_ptr<Inline>> child_inlines);

    void Print(FILE* output) const;

   private:
    // The nest level of this inline record.
    int inline_nest_level_;
    // The source line number at where this inlined function is called.
    DWORD call_site_line_ = 0;
    // The call site file id at where this inlined function is called.
    DWORD call_site_file_id_ = 0;
    // The id used for referring to an InlineOrigin.
    int origin_id_ = 0;
    // A map from rva to length. This is the address ranges covered by this
    // Inline.
    map<DWORD, DWORD> ranges_;
    // The list of direct Inlines inlined inside this Inline.
    vector<std::unique_ptr<Inline>> child_inlines_;
  };

  // Lines represents a map of lines inside a function with rva as the key.
  // AddLine function adds a line into the map and ensures that there is no
  // overlap between any two lines in the map.
  class Lines {
   public:
    const map<DWORD, Line>& GetLineMap() const { return line_map_; }

    // Finds the line from line_map_ that contains the given rva returns its
    // line_num. If not found, return 0.
    DWORD GetLineNum(DWORD rva) const;

    // Finds the line from line_map_ that contains the given rva returns its
    // file_id. If not found, return 0.
    DWORD GetFileId(DWORD rva) const;

    // Add the `line` into line_map_. If the `line` overlaps with existing
    // lines, truncate the existing lines and add the given line. It ensures
    // that all lines in line_map_ do not overlap with each other. For example,
    // suppose there is a line A in the map and we call AddLine with Line B.
    // Line A: rva: 100, length: 20, line_num: 10, file_id: 1
    // Line B: rva: 105, length: 10, line_num: 4, file_id: 2
    // After calling AddLine with Line B, we will have the following lines:
    // Line 1: rva: 100, length: 5, line_num: 10, file_id: 1
    // Line 2: rva: 105, length: 10, line_num: 4, file_id: 2
    // Line 3: rva: 115, length: 5, line_num: 10, file_id: 1
    void AddLine(const Line& line);

   private:
    // Finds the line from line_map_ that contains the given rva. If not found,
    // return nullptr.
    const Line* GetLine(DWORD rva) const;
    // The key is rva. AddLine function ensures that any two lines in the map do
    // not overlap.
    map<DWORD, Line> line_map_;
  };

  // Construct Line from IDiaLineNumber. The output Line is stored at line.
  // Return true on success.
  bool GetLine(IDiaLineNumber* dia_line, Line* line) const;

  // Construct Lines from IDiaEnumLineNumbers. The list of Lines are stored at
  // line_list.
  // Returns true on success.
  bool GetLines(IDiaEnumLineNumbers* lines, Lines* line_list) const;

  // Outputs the line/address pairs for each line in the enumerator.
  void PrintLines(const Lines& lines) const;

  // Outputs a function address and name, followed by its source line list.
  // block can be the same object as function, or it can be a reference to a
  // code block that is lexically part of this function, but resides at a
  // separate address. If has_multiple_symbols is true, this function's
  // instructions correspond to multiple symbols. Returns true on success.
  bool PrintFunction(IDiaSymbol *function, IDiaSymbol *block,
                     bool has_multiple_symbols);

  // Outputs all functions as described above.  Returns true on success.
  bool PrintFunctions();

  // Outputs all of the source files in the session's pdb file.
  // Returns true on success.
  bool PrintSourceFiles();

  // Output all inline origins.
  void PrintInlineOrigins() const;

  // Retrieve inlines inside the given block. It also adds inlinee lines to
  // `line_list` since inner lines are more precise source location. If the
  // block has children wih SymTagInlineSite Tag, it will recursively (DFS) call
  // itself with each child as first argument. Returns true on success.
  // `block`: the IDiaSymbol that may have inline sites.
  // `line_list`: the list of lines inside current function.
  // `inline_nest_level`: the nest level of block's Inlines.
  // `inlines`: the vector to store the list of inlines for the block.
  bool GetInlines(IDiaSymbol* block,
                  Lines* line_list,
                  int inline_nest_level,
                  vector<std::unique_ptr<Inline>>* inlines);

  // Outputs all inlines.
  void PrintInlines(const vector<std::unique_ptr<Inline>>& inlines) const;

  // Outputs all of the frame information necessary to construct stack
  // backtraces in the absence of frame pointers. For x86 data stored in
  // .pdb files. Returns true on success.
  bool PrintFrameDataUsingPDB();

  // Outputs all of the frame information necessary to construct stack
  // backtraces in the absence of frame pointers. For x64 data stored in
  // .exe, .dll files. Returns true on success.
  bool PrintFrameDataUsingEXE();

  // Outputs all of the frame information necessary to construct stack
  // backtraces in the absence of frame pointers.  Returns true on success.
  bool PrintFrameData();

  // Outputs a single public symbol address and name, if the symbol corresponds
  // to a code address.  Returns true on success.  If symbol is does not
  // correspond to code, returns true without outputting anything. If
  // has_multiple_symbols is true, the symbol corresponds to a code address and
  // the instructions correspond to multiple symbols.
  bool PrintCodePublicSymbol(IDiaSymbol *symbol, bool has_multiple_symbols);

  // Outputs a line identifying the PDB file that is being dumped, along with
  // its uuid and age.
  bool PrintPDBInfo();

  // Outputs a line identifying the PE file corresponding to the PDB
  // file that is being dumped, along with its code identifier,
  // which consists of its timestamp and file size.
  bool PrintPEInfo();

  // Returns true if this filename has already been seen,
  // and an ID is stored for it, or false if it has not.
  bool FileIDIsCached(const wstring& file) {
    return unique_files_.find(file) != unique_files_.end();
  }

  // Cache this filename and ID for later reuse.
  void CacheFileID(const wstring& file, DWORD id) {
    unique_files_[file] = id;
  }

  // Store this ID in the cache as a duplicate for this filename.
  void StoreDuplicateFileID(const wstring& file, DWORD id) {
    unordered_map<wstring, DWORD>::iterator iter = unique_files_.find(file);
    if (iter != unique_files_.end()) {
      // map this id to the previously seen one
      file_ids_[id] = iter->second;
    }
  }

  // Given a file's unique ID, return the ID that should be used to
  // reference it. There may be multiple files with identical filenames
  // but different unique IDs. The cache attempts to coalesce these into
  // one ID per unique filename.
  DWORD GetRealFileID(DWORD id) const {
    unordered_map<DWORD, DWORD>::const_iterator iter = file_ids_.find(id);
    if (iter == file_ids_.end())
      return id;
    return iter->second;
  }

  // Find the PE file corresponding to the loaded PDB file, and
  // set the code_file_ member. Returns false on failure.
  bool FindPEFile();

  // Returns the function name for a symbol.  If possible, the name is
  // undecorated.  If the symbol's decorated form indicates the size of
  // parameters on the stack, this information is returned in stack_param_size.
  // Returns true on success.  If the symbol doesn't encode parameter size
  // information, stack_param_size is set to -1.
  static bool GetSymbolFunctionName(IDiaSymbol *function, BSTR *name,
                                    int *stack_param_size);

  // Returns the number of bytes of stack space used for a function's
  // parameters.  function must have the tag SymTagFunction.  In the event of
  // a failure, returns 0, which is also a valid number of bytes.
  static int GetFunctionStackParamSize(IDiaSymbol *function);

  // The filename of the PE file corresponding to the currently-open
  // pdb file.
  wstring code_file_;

  // The session for the currently-open pdb file.
  CComPtr<IDiaSession> session_;

  // The current output file for this WriteMap invocation.
  FILE *output_;

  // There may be many duplicate filenames with different IDs.
  // This maps from the DIA "unique ID" to a single ID per unique
  // filename.
  unordered_map<DWORD, DWORD> file_ids_;
  // This maps unique filenames to file IDs.
  unordered_map<wstring, DWORD> unique_files_;

  // The INLINE_ORIGINS records. The key is the function name.
  std::map<wstring, InlineOrigin> inline_origins_;

  // This is used for calculating post-transform symbol addresses and lengths.
  ImageMap image_map_;

  // If we should output INLINE/INLINE_ORIGIN records
  bool handle_inline_;

  // Disallow copy ctor and operator=
  PDBSourceLineWriter(const PDBSourceLineWriter&);
  void operator=(const PDBSourceLineWriter&);
};

}  // namespace google_breakpad

#endif  // COMMON_WINDOWS_PDB_SOURCE_LINE_WRITER_H_