blob: 10037cd66ef129756d79ac57681e7d9617ecac9a [file] [log] [blame]
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01001//===- llvm/MC/MCDisassembler.h - Disassembler interface --------*- C++ -*-===//
2//
Andrew Walbran16937d02019-10-22 13:54:20 +01003// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01006//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_MC_MCDISASSEMBLER_MCDISASSEMBLER_H
10#define LLVM_MC_MCDISASSEMBLER_MCDISASSEMBLER_H
11
Olivier Deprezf4ef2d02021-04-20 13:36:24 +020012#include "llvm/ADT/Optional.h"
13#include "llvm/ADT/StringRef.h"
14#include "llvm/BinaryFormat/XCOFF.h"
Andrew Scull5e1ddfa2018-08-14 10:06:54 +010015#include "llvm/MC/MCDisassembler/MCSymbolizer.h"
16#include <cstdint>
17#include <memory>
Olivier Deprezf4ef2d02021-04-20 13:36:24 +020018#include <vector>
Andrew Scull5e1ddfa2018-08-14 10:06:54 +010019
20namespace llvm {
21
Olivier Deprezf4ef2d02021-04-20 13:36:24 +020022struct XCOFFSymbolInfo {
23 Optional<XCOFF::StorageMappingClass> StorageMappingClass;
24 Optional<uint32_t> Index;
25 bool IsLabel;
26 XCOFFSymbolInfo(Optional<XCOFF::StorageMappingClass> Smc,
27 Optional<uint32_t> Idx, bool Label)
28 : StorageMappingClass(Smc), Index(Idx), IsLabel(Label) {}
29
30 bool operator<(const XCOFFSymbolInfo &SymInfo) const;
31};
32
33struct SymbolInfoTy {
34 uint64_t Addr;
35 StringRef Name;
36 union {
37 uint8_t Type;
38 XCOFFSymbolInfo XCOFFSymInfo;
39 };
40
41private:
42 bool IsXCOFF;
43
44public:
45 SymbolInfoTy(uint64_t Addr, StringRef Name,
46 Optional<XCOFF::StorageMappingClass> Smc, Optional<uint32_t> Idx,
47 bool Label)
48 : Addr(Addr), Name(Name), XCOFFSymInfo(Smc, Idx, Label), IsXCOFF(true) {}
49 SymbolInfoTy(uint64_t Addr, StringRef Name, uint8_t Type)
50 : Addr(Addr), Name(Name), Type(Type), IsXCOFF(false) {}
51 bool isXCOFF() const { return IsXCOFF; }
52
53private:
54 friend bool operator<(const SymbolInfoTy &P1, const SymbolInfoTy &P2) {
55 assert(P1.IsXCOFF == P2.IsXCOFF &&
56 "P1.IsXCOFF should be equal to P2.IsXCOFF.");
57 if (P1.IsXCOFF)
58 return std::tie(P1.Addr, P1.XCOFFSymInfo, P1.Name) <
59 std::tie(P2.Addr, P2.XCOFFSymInfo, P2.Name);
60
61 return std::tie(P1.Addr, P1.Name, P1.Type) <
62 std::tie(P2.Addr, P2.Name, P2.Type);
63 }
64};
65
66using SectionSymbolsTy = std::vector<SymbolInfoTy>;
67
Andrew Scull5e1ddfa2018-08-14 10:06:54 +010068template <typename T> class ArrayRef;
69class MCContext;
70class MCInst;
71class MCSubtargetInfo;
72class raw_ostream;
73
74/// Superclass for all disassemblers. Consumes a memory region and provides an
75/// array of assembly instructions.
76class MCDisassembler {
77public:
78 /// Ternary decode status. Most backends will just use Fail and
79 /// Success, however some have a concept of an instruction with
80 /// understandable semantics but which is architecturally
81 /// incorrect. An example of this is ARM UNPREDICTABLE instructions
82 /// which are disassemblable but cause undefined behaviour.
83 ///
84 /// Because it makes sense to disassemble these instructions, there
85 /// is a "soft fail" failure mode that indicates the MCInst& is
86 /// valid but architecturally incorrect.
87 ///
88 /// The enum numbers are deliberately chosen such that reduction
89 /// from Success->SoftFail ->Fail can be done with a simple
90 /// bitwise-AND:
91 ///
92 /// LEFT & TOP = | Success Unpredictable Fail
93 /// --------------+-----------------------------------
94 /// Success | Success Unpredictable Fail
95 /// Unpredictable | Unpredictable Unpredictable Fail
96 /// Fail | Fail Fail Fail
97 ///
98 /// An easy way of encoding this is as 0b11, 0b01, 0b00 for
99 /// Success, SoftFail, Fail respectively.
100 enum DecodeStatus {
101 Fail = 0,
102 SoftFail = 1,
103 Success = 3
104 };
105
106 MCDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
107 : Ctx(Ctx), STI(STI) {}
108
109 virtual ~MCDisassembler();
110
111 /// Returns the disassembly of a single instruction.
112 ///
113 /// \param Instr - An MCInst to populate with the contents of the
114 /// instruction.
115 /// \param Size - A value to populate with the size of the instruction, or
116 /// the number of bytes consumed while attempting to decode
117 /// an invalid instruction.
118 /// \param Address - The address, in the memory space of region, of the first
119 /// byte of the instruction.
120 /// \param Bytes - A reference to the actual bytes of the instruction.
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100121 /// \param CStream - The stream to print comments and annotations on.
122 /// \return - MCDisassembler::Success if the instruction is valid,
123 /// MCDisassembler::SoftFail if the instruction was
124 /// disassemblable but invalid,
125 /// MCDisassembler::Fail if the instruction was invalid.
126 virtual DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
127 ArrayRef<uint8_t> Bytes, uint64_t Address,
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100128 raw_ostream &CStream) const = 0;
129
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200130 /// Used to perform separate target specific disassembly for a particular
131 /// symbol. May parse any prelude that precedes instructions after the
132 /// start of a symbol, or the entire symbol.
133 /// This is used for example by WebAssembly to decode preludes.
Andrew Walbran16937d02019-10-22 13:54:20 +0100134 ///
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200135 /// Base implementation returns None. So all targets by default ignore to
136 /// treat symbols separately.
137 ///
138 /// \param Symbol - The symbol.
Andrew Walbran16937d02019-10-22 13:54:20 +0100139 /// \param Size - The number of bytes consumed.
140 /// \param Address - The address, in the memory space of region, of the first
141 /// byte of the symbol.
142 /// \param Bytes - A reference to the actual bytes at the symbol location.
Andrew Walbran16937d02019-10-22 13:54:20 +0100143 /// \param CStream - The stream to print comments and annotations on.
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200144 /// \return - MCDisassembler::Success if bytes are decoded
145 /// successfully. Size must hold the number of bytes that
146 /// were decoded.
147 /// - MCDisassembler::Fail if the bytes are invalid. Size
148 /// must hold the number of bytes that were decoded before
149 /// failing. The target must print nothing. This can be
150 /// done by buffering the output if needed.
151 /// - None if the target doesn't want to handle the symbol
152 /// separately. Value of Size is ignored in this case.
153 virtual Optional<DecodeStatus>
154 onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef<uint8_t> Bytes,
155 uint64_t Address, raw_ostream &CStream) const;
156 // TODO:
157 // Implement similar hooks that can be used at other points during
158 // disassembly. Something along the following lines:
159 // - onBeforeInstructionDecode()
160 // - onAfterInstructionDecode()
161 // - onSymbolEnd()
162 // It should help move much of the target specific code from llvm-objdump to
163 // respective target disassemblers.
Andrew Walbran16937d02019-10-22 13:54:20 +0100164
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100165private:
166 MCContext &Ctx;
167
168protected:
169 // Subtarget information, for instruction decoding predicates if required.
170 const MCSubtargetInfo &STI;
171 std::unique_ptr<MCSymbolizer> Symbolizer;
172
173public:
174 // Helpers around MCSymbolizer
175 bool tryAddingSymbolicOperand(MCInst &Inst,
176 int64_t Value,
177 uint64_t Address, bool IsBranch,
178 uint64_t Offset, uint64_t InstSize) const;
179
180 void tryAddingPcLoadReferenceComment(int64_t Value, uint64_t Address) const;
181
182 /// Set \p Symzer as the current symbolizer.
183 /// This takes ownership of \p Symzer, and deletes the previously set one.
184 void setSymbolizer(std::unique_ptr<MCSymbolizer> Symzer);
185
186 MCContext& getContext() const { return Ctx; }
187
188 const MCSubtargetInfo& getSubtargetInfo() const { return STI; }
189
190 // Marked mutable because we cache it inside the disassembler, rather than
191 // having to pass it around as an argument through all the autogenerated code.
192 mutable raw_ostream *CommentStream = nullptr;
193};
194
195} // end namespace llvm
196
197#endif // LLVM_MC_MCDISASSEMBLER_MCDISASSEMBLER_H