Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame^] | 1 | //===- llvm/MC/MCTargetAsmParser.h - Target Assembly Parser -----*- C++ -*-===// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | |
| 10 | #ifndef LLVM_MC_MCPARSER_MCTARGETASMPARSER_H |
| 11 | #define LLVM_MC_MCPARSER_MCTARGETASMPARSER_H |
| 12 | |
| 13 | #include "llvm/ADT/StringRef.h" |
| 14 | #include "llvm/MC/MCExpr.h" |
| 15 | #include "llvm/MC/MCInstrInfo.h" |
| 16 | #include "llvm/MC/MCParser/MCAsmLexer.h" |
| 17 | #include "llvm/MC/MCParser/MCAsmParserExtension.h" |
| 18 | #include "llvm/MC/MCTargetOptions.h" |
| 19 | #include "llvm/Support/SMLoc.h" |
| 20 | #include <cstdint> |
| 21 | #include <memory> |
| 22 | |
| 23 | namespace llvm { |
| 24 | |
| 25 | class MCInst; |
| 26 | class MCParsedAsmOperand; |
| 27 | class MCStreamer; |
| 28 | class MCSubtargetInfo; |
| 29 | template <typename T> class SmallVectorImpl; |
| 30 | |
| 31 | using OperandVector = SmallVectorImpl<std::unique_ptr<MCParsedAsmOperand>>; |
| 32 | |
| 33 | enum AsmRewriteKind { |
| 34 | AOK_Align, // Rewrite align as .align. |
| 35 | AOK_EVEN, // Rewrite even as .even. |
| 36 | AOK_Emit, // Rewrite _emit as .byte. |
| 37 | AOK_Input, // Rewrite in terms of $N. |
| 38 | AOK_Output, // Rewrite in terms of $N. |
| 39 | AOK_SizeDirective, // Add a sizing directive (e.g., dword ptr). |
| 40 | AOK_Label, // Rewrite local labels. |
| 41 | AOK_EndOfStatement, // Add EndOfStatement (e.g., "\n\t"). |
| 42 | AOK_Skip, // Skip emission (e.g., offset/type operators). |
| 43 | AOK_IntelExpr // SizeDirective SymDisp [BaseReg + IndexReg * Scale + ImmDisp] |
| 44 | }; |
| 45 | |
| 46 | const char AsmRewritePrecedence [] = { |
| 47 | 2, // AOK_Align |
| 48 | 2, // AOK_EVEN |
| 49 | 2, // AOK_Emit |
| 50 | 3, // AOK_Input |
| 51 | 3, // AOK_Output |
| 52 | 5, // AOK_SizeDirective |
| 53 | 1, // AOK_Label |
| 54 | 5, // AOK_EndOfStatement |
| 55 | 2, // AOK_Skip |
| 56 | 2 // AOK_IntelExpr |
| 57 | }; |
| 58 | |
| 59 | // Represnt the various parts which makes up an intel expression, |
| 60 | // used for emitting compound intel expressions |
| 61 | struct IntelExpr { |
| 62 | bool NeedBracs; |
| 63 | int64_t Imm; |
| 64 | StringRef BaseReg; |
| 65 | StringRef IndexReg; |
| 66 | unsigned Scale; |
| 67 | |
| 68 | IntelExpr(bool needBracs = false) : NeedBracs(needBracs), Imm(0), |
| 69 | BaseReg(StringRef()), IndexReg(StringRef()), |
| 70 | Scale(1) {} |
| 71 | // Compund immediate expression |
| 72 | IntelExpr(int64_t imm, bool needBracs) : IntelExpr(needBracs) { |
| 73 | Imm = imm; |
| 74 | } |
| 75 | // [Reg + ImmediateExpression] |
| 76 | // We don't bother to emit an immediate expression evaluated to zero |
| 77 | IntelExpr(StringRef reg, int64_t imm = 0, unsigned scale = 0, |
| 78 | bool needBracs = true) : |
| 79 | IntelExpr(imm, needBracs) { |
| 80 | IndexReg = reg; |
| 81 | if (scale) |
| 82 | Scale = scale; |
| 83 | } |
| 84 | // [BaseReg + IndexReg * ScaleExpression + ImmediateExpression] |
| 85 | IntelExpr(StringRef baseReg, StringRef indexReg, unsigned scale = 0, |
| 86 | int64_t imm = 0, bool needBracs = true) : |
| 87 | IntelExpr(indexReg, imm, scale, needBracs) { |
| 88 | BaseReg = baseReg; |
| 89 | } |
| 90 | bool hasBaseReg() const { |
| 91 | return BaseReg.size(); |
| 92 | } |
| 93 | bool hasIndexReg() const { |
| 94 | return IndexReg.size(); |
| 95 | } |
| 96 | bool hasRegs() const { |
| 97 | return hasBaseReg() || hasIndexReg(); |
| 98 | } |
| 99 | bool isValid() const { |
| 100 | return (Scale == 1) || |
| 101 | (hasIndexReg() && (Scale == 2 || Scale == 4 || Scale == 8)); |
| 102 | } |
| 103 | }; |
| 104 | |
| 105 | struct AsmRewrite { |
| 106 | AsmRewriteKind Kind; |
| 107 | SMLoc Loc; |
| 108 | unsigned Len; |
| 109 | int64_t Val; |
| 110 | StringRef Label; |
| 111 | IntelExpr IntelExp; |
| 112 | |
| 113 | public: |
| 114 | AsmRewrite(AsmRewriteKind kind, SMLoc loc, unsigned len = 0, int64_t val = 0) |
| 115 | : Kind(kind), Loc(loc), Len(len), Val(val) {} |
| 116 | AsmRewrite(AsmRewriteKind kind, SMLoc loc, unsigned len, StringRef label) |
| 117 | : AsmRewrite(kind, loc, len) { Label = label; } |
| 118 | AsmRewrite(SMLoc loc, unsigned len, IntelExpr exp) |
| 119 | : AsmRewrite(AOK_IntelExpr, loc, len) { IntelExp = exp; } |
| 120 | }; |
| 121 | |
| 122 | struct ParseInstructionInfo { |
| 123 | SmallVectorImpl<AsmRewrite> *AsmRewrites = nullptr; |
| 124 | |
| 125 | ParseInstructionInfo() = default; |
| 126 | ParseInstructionInfo(SmallVectorImpl<AsmRewrite> *rewrites) |
| 127 | : AsmRewrites(rewrites) {} |
| 128 | }; |
| 129 | |
| 130 | enum OperandMatchResultTy { |
| 131 | MatchOperand_Success, // operand matched successfully |
| 132 | MatchOperand_NoMatch, // operand did not match |
| 133 | MatchOperand_ParseFail // operand matched but had errors |
| 134 | }; |
| 135 | |
| 136 | // When matching of an assembly instruction fails, there may be multiple |
| 137 | // encodings that are close to being a match. It's often ambiguous which one |
| 138 | // the programmer intended to use, so we want to report an error which mentions |
| 139 | // each of these "near-miss" encodings. This struct contains information about |
| 140 | // one such encoding, and why it did not match the parsed instruction. |
| 141 | class NearMissInfo { |
| 142 | public: |
| 143 | enum NearMissKind { |
| 144 | NoNearMiss, |
| 145 | NearMissOperand, |
| 146 | NearMissFeature, |
| 147 | NearMissPredicate, |
| 148 | NearMissTooFewOperands, |
| 149 | }; |
| 150 | |
| 151 | // The encoding is valid for the parsed assembly string. This is only used |
| 152 | // internally to the table-generated assembly matcher. |
| 153 | static NearMissInfo getSuccess() { return NearMissInfo(); } |
| 154 | |
| 155 | // The instruction encoding is not valid because it requires some target |
| 156 | // features that are not currently enabled. MissingFeatures has a bit set for |
| 157 | // each feature that the encoding needs but which is not enabled. |
| 158 | static NearMissInfo getMissedFeature(uint64_t MissingFeatures) { |
| 159 | NearMissInfo Result; |
| 160 | Result.Kind = NearMissFeature; |
| 161 | Result.Features = MissingFeatures; |
| 162 | return Result; |
| 163 | } |
| 164 | |
| 165 | // The instruction encoding is not valid because the target-specific |
| 166 | // predicate function returned an error code. FailureCode is the |
| 167 | // target-specific error code returned by the predicate. |
| 168 | static NearMissInfo getMissedPredicate(unsigned FailureCode) { |
| 169 | NearMissInfo Result; |
| 170 | Result.Kind = NearMissPredicate; |
| 171 | Result.PredicateError = FailureCode; |
| 172 | return Result; |
| 173 | } |
| 174 | |
| 175 | // The instruction encoding is not valid because one (and only one) parsed |
| 176 | // operand is not of the correct type. OperandError is the error code |
| 177 | // relating to the operand class expected by the encoding. OperandClass is |
| 178 | // the type of the expected operand. Opcode is the opcode of the encoding. |
| 179 | // OperandIndex is the index into the parsed operand list. |
| 180 | static NearMissInfo getMissedOperand(unsigned OperandError, |
| 181 | unsigned OperandClass, unsigned Opcode, |
| 182 | unsigned OperandIndex) { |
| 183 | NearMissInfo Result; |
| 184 | Result.Kind = NearMissOperand; |
| 185 | Result.MissedOperand.Error = OperandError; |
| 186 | Result.MissedOperand.Class = OperandClass; |
| 187 | Result.MissedOperand.Opcode = Opcode; |
| 188 | Result.MissedOperand.Index = OperandIndex; |
| 189 | return Result; |
| 190 | } |
| 191 | |
| 192 | // The instruction encoding is not valid because it expects more operands |
| 193 | // than were parsed. OperandClass is the class of the expected operand that |
| 194 | // was not provided. Opcode is the instruction encoding. |
| 195 | static NearMissInfo getTooFewOperands(unsigned OperandClass, |
| 196 | unsigned Opcode) { |
| 197 | NearMissInfo Result; |
| 198 | Result.Kind = NearMissTooFewOperands; |
| 199 | Result.TooFewOperands.Class = OperandClass; |
| 200 | Result.TooFewOperands.Opcode = Opcode; |
| 201 | return Result; |
| 202 | } |
| 203 | |
| 204 | operator bool() const { return Kind != NoNearMiss; } |
| 205 | |
| 206 | NearMissKind getKind() const { return Kind; } |
| 207 | |
| 208 | // Feature flags required by the instruction, that the current target does |
| 209 | // not have. |
| 210 | uint64_t getFeatures() const { |
| 211 | assert(Kind == NearMissFeature); |
| 212 | return Features; |
| 213 | } |
| 214 | // Error code returned by the target predicate when validating this |
| 215 | // instruction encoding. |
| 216 | unsigned getPredicateError() const { |
| 217 | assert(Kind == NearMissPredicate); |
| 218 | return PredicateError; |
| 219 | } |
| 220 | // MatchClassKind of the operand that we expected to see. |
| 221 | unsigned getOperandClass() const { |
| 222 | assert(Kind == NearMissOperand || Kind == NearMissTooFewOperands); |
| 223 | return MissedOperand.Class; |
| 224 | } |
| 225 | // Opcode of the encoding we were trying to match. |
| 226 | unsigned getOpcode() const { |
| 227 | assert(Kind == NearMissOperand || Kind == NearMissTooFewOperands); |
| 228 | return MissedOperand.Opcode; |
| 229 | } |
| 230 | // Error code returned when validating the operand. |
| 231 | unsigned getOperandError() const { |
| 232 | assert(Kind == NearMissOperand); |
| 233 | return MissedOperand.Error; |
| 234 | } |
| 235 | // Index of the actual operand we were trying to match in the list of parsed |
| 236 | // operands. |
| 237 | unsigned getOperandIndex() const { |
| 238 | assert(Kind == NearMissOperand); |
| 239 | return MissedOperand.Index; |
| 240 | } |
| 241 | |
| 242 | private: |
| 243 | NearMissKind Kind; |
| 244 | |
| 245 | // These two structs share a common prefix, so we can safely rely on the fact |
| 246 | // that they overlap in the union. |
| 247 | struct MissedOpInfo { |
| 248 | unsigned Class; |
| 249 | unsigned Opcode; |
| 250 | unsigned Error; |
| 251 | unsigned Index; |
| 252 | }; |
| 253 | |
| 254 | struct TooFewOperandsInfo { |
| 255 | unsigned Class; |
| 256 | unsigned Opcode; |
| 257 | }; |
| 258 | |
| 259 | union { |
| 260 | uint64_t Features; |
| 261 | unsigned PredicateError; |
| 262 | MissedOpInfo MissedOperand; |
| 263 | TooFewOperandsInfo TooFewOperands; |
| 264 | }; |
| 265 | |
| 266 | NearMissInfo() : Kind(NoNearMiss) {} |
| 267 | }; |
| 268 | |
| 269 | /// MCTargetAsmParser - Generic interface to target specific assembly parsers. |
| 270 | class MCTargetAsmParser : public MCAsmParserExtension { |
| 271 | public: |
| 272 | enum MatchResultTy { |
| 273 | Match_InvalidOperand, |
| 274 | Match_InvalidTiedOperand, |
| 275 | Match_MissingFeature, |
| 276 | Match_MnemonicFail, |
| 277 | Match_Success, |
| 278 | Match_NearMisses, |
| 279 | FIRST_TARGET_MATCH_RESULT_TY |
| 280 | }; |
| 281 | |
| 282 | protected: // Can only create subclasses. |
| 283 | MCTargetAsmParser(MCTargetOptions const &, const MCSubtargetInfo &STI, |
| 284 | const MCInstrInfo &MII); |
| 285 | |
| 286 | /// Create a copy of STI and return a non-const reference to it. |
| 287 | MCSubtargetInfo ©STI(); |
| 288 | |
| 289 | /// AvailableFeatures - The current set of available features. |
| 290 | uint64_t AvailableFeatures = 0; |
| 291 | |
| 292 | /// ParsingInlineAsm - Are we parsing ms-style inline assembly? |
| 293 | bool ParsingInlineAsm = false; |
| 294 | |
| 295 | /// SemaCallback - The Sema callback implementation. Must be set when parsing |
| 296 | /// ms-style inline assembly. |
| 297 | MCAsmParserSemaCallback *SemaCallback; |
| 298 | |
| 299 | /// Set of options which affects instrumentation of inline assembly. |
| 300 | MCTargetOptions MCOptions; |
| 301 | |
| 302 | /// Current STI. |
| 303 | const MCSubtargetInfo *STI; |
| 304 | |
| 305 | const MCInstrInfo &MII; |
| 306 | |
| 307 | public: |
| 308 | MCTargetAsmParser(const MCTargetAsmParser &) = delete; |
| 309 | MCTargetAsmParser &operator=(const MCTargetAsmParser &) = delete; |
| 310 | |
| 311 | ~MCTargetAsmParser() override; |
| 312 | |
| 313 | const MCSubtargetInfo &getSTI() const; |
| 314 | |
| 315 | uint64_t getAvailableFeatures() const { return AvailableFeatures; } |
| 316 | void setAvailableFeatures(uint64_t Value) { AvailableFeatures = Value; } |
| 317 | |
| 318 | bool isParsingInlineAsm () { return ParsingInlineAsm; } |
| 319 | void setParsingInlineAsm (bool Value) { ParsingInlineAsm = Value; } |
| 320 | |
| 321 | MCTargetOptions getTargetOptions() const { return MCOptions; } |
| 322 | |
| 323 | void setSemaCallback(MCAsmParserSemaCallback *Callback) { |
| 324 | SemaCallback = Callback; |
| 325 | } |
| 326 | |
| 327 | virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, |
| 328 | SMLoc &EndLoc) = 0; |
| 329 | |
| 330 | /// Sets frame register corresponding to the current MachineFunction. |
| 331 | virtual void SetFrameRegister(unsigned RegNo) {} |
| 332 | |
| 333 | /// ParseInstruction - Parse one assembly instruction. |
| 334 | /// |
| 335 | /// The parser is positioned following the instruction name. The target |
| 336 | /// specific instruction parser should parse the entire instruction and |
| 337 | /// construct the appropriate MCInst, or emit an error. On success, the entire |
| 338 | /// line should be parsed up to and including the end-of-statement token. On |
| 339 | /// failure, the parser is not required to read to the end of the line. |
| 340 | // |
| 341 | /// \param Name - The instruction name. |
| 342 | /// \param NameLoc - The source location of the name. |
| 343 | /// \param Operands [out] - The list of parsed operands, this returns |
| 344 | /// ownership of them to the caller. |
| 345 | /// \return True on failure. |
| 346 | virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, |
| 347 | SMLoc NameLoc, OperandVector &Operands) = 0; |
| 348 | virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, |
| 349 | AsmToken Token, OperandVector &Operands) { |
| 350 | return ParseInstruction(Info, Name, Token.getLoc(), Operands); |
| 351 | } |
| 352 | |
| 353 | /// ParseDirective - Parse a target specific assembler directive |
| 354 | /// |
| 355 | /// The parser is positioned following the directive name. The target |
| 356 | /// specific directive parser should parse the entire directive doing or |
| 357 | /// recording any target specific work, or return true and do nothing if the |
| 358 | /// directive is not target specific. If the directive is specific for |
| 359 | /// the target, the entire line is parsed up to and including the |
| 360 | /// end-of-statement token and false is returned. |
| 361 | /// |
| 362 | /// \param DirectiveID - the identifier token of the directive. |
| 363 | virtual bool ParseDirective(AsmToken DirectiveID) = 0; |
| 364 | |
| 365 | /// MatchAndEmitInstruction - Recognize a series of operands of a parsed |
| 366 | /// instruction as an actual MCInst and emit it to the specified MCStreamer. |
| 367 | /// This returns false on success and returns true on failure to match. |
| 368 | /// |
| 369 | /// On failure, the target parser is responsible for emitting a diagnostic |
| 370 | /// explaining the match failure. |
| 371 | virtual bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, |
| 372 | OperandVector &Operands, MCStreamer &Out, |
| 373 | uint64_t &ErrorInfo, |
| 374 | bool MatchingInlineAsm) = 0; |
| 375 | |
| 376 | /// Allows targets to let registers opt out of clobber lists. |
| 377 | virtual bool OmitRegisterFromClobberLists(unsigned RegNo) { return false; } |
| 378 | |
| 379 | /// Allow a target to add special case operand matching for things that |
| 380 | /// tblgen doesn't/can't handle effectively. For example, literal |
| 381 | /// immediates on ARM. TableGen expects a token operand, but the parser |
| 382 | /// will recognize them as immediates. |
| 383 | virtual unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, |
| 384 | unsigned Kind) { |
| 385 | return Match_InvalidOperand; |
| 386 | } |
| 387 | |
| 388 | /// Validate the instruction match against any complex target predicates |
| 389 | /// before rendering any operands to it. |
| 390 | virtual unsigned |
| 391 | checkEarlyTargetMatchPredicate(MCInst &Inst, const OperandVector &Operands) { |
| 392 | return Match_Success; |
| 393 | } |
| 394 | |
| 395 | /// checkTargetMatchPredicate - Validate the instruction match against |
| 396 | /// any complex target predicates not expressible via match classes. |
| 397 | virtual unsigned checkTargetMatchPredicate(MCInst &Inst) { |
| 398 | return Match_Success; |
| 399 | } |
| 400 | |
| 401 | virtual void convertToMapAndConstraints(unsigned Kind, |
| 402 | const OperandVector &Operands) = 0; |
| 403 | |
| 404 | // Return whether this parser uses assignment statements with equals tokens |
| 405 | virtual bool equalIsAsmAssignment() { return true; }; |
| 406 | // Return whether this start of statement identifier is a label |
| 407 | virtual bool isLabel(AsmToken &Token) { return true; }; |
| 408 | // Return whether this parser accept star as start of statement |
| 409 | virtual bool starIsStartOfStatement() { return false; }; |
| 410 | |
| 411 | virtual const MCExpr *applyModifierToExpr(const MCExpr *E, |
| 412 | MCSymbolRefExpr::VariantKind, |
| 413 | MCContext &Ctx) { |
| 414 | return nullptr; |
| 415 | } |
| 416 | |
| 417 | virtual void onLabelParsed(MCSymbol *Symbol) {} |
| 418 | |
| 419 | /// Ensure that all previously parsed instructions have been emitted to the |
| 420 | /// output streamer, if the target does not emit them immediately. |
| 421 | virtual void flushPendingInstructions(MCStreamer &Out) {} |
| 422 | |
| 423 | virtual const MCExpr *createTargetUnaryExpr(const MCExpr *E, |
| 424 | AsmToken::TokenKind OperatorToken, |
| 425 | MCContext &Ctx) { |
| 426 | return nullptr; |
| 427 | } |
| 428 | }; |
| 429 | |
| 430 | } // end namespace llvm |
| 431 | |
| 432 | #endif // LLVM_MC_MCPARSER_MCTARGETASMPARSER_H |