blob: bb979422f0a5b0841a8fa5e7120343609fdfc342 [file] [log] [blame]
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01001//===- llvm/MC/MCTargetAsmParser.h - Target Assembly Parser -----*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#ifndef LLVM_MC_MCPARSER_MCTARGETASMPARSER_H
11#define LLVM_MC_MCPARSER_MCTARGETASMPARSER_H
12
13#include "llvm/ADT/StringRef.h"
14#include "llvm/MC/MCExpr.h"
15#include "llvm/MC/MCInstrInfo.h"
16#include "llvm/MC/MCParser/MCAsmLexer.h"
Andrew Scullcdfcccc2018-10-05 20:58:37 +010017#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
Andrew Scull5e1ddfa2018-08-14 10:06:54 +010018#include "llvm/MC/MCParser/MCAsmParserExtension.h"
19#include "llvm/MC/MCTargetOptions.h"
20#include "llvm/Support/SMLoc.h"
21#include <cstdint>
22#include <memory>
23
24namespace llvm {
25
26class MCInst;
27class MCParsedAsmOperand;
28class MCStreamer;
29class MCSubtargetInfo;
30template <typename T> class SmallVectorImpl;
31
32using OperandVector = SmallVectorImpl<std::unique_ptr<MCParsedAsmOperand>>;
33
34enum AsmRewriteKind {
35 AOK_Align, // Rewrite align as .align.
36 AOK_EVEN, // Rewrite even as .even.
37 AOK_Emit, // Rewrite _emit as .byte.
38 AOK_Input, // Rewrite in terms of $N.
39 AOK_Output, // Rewrite in terms of $N.
40 AOK_SizeDirective, // Add a sizing directive (e.g., dword ptr).
41 AOK_Label, // Rewrite local labels.
42 AOK_EndOfStatement, // Add EndOfStatement (e.g., "\n\t").
43 AOK_Skip, // Skip emission (e.g., offset/type operators).
44 AOK_IntelExpr // SizeDirective SymDisp [BaseReg + IndexReg * Scale + ImmDisp]
45};
46
47const char AsmRewritePrecedence [] = {
48 2, // AOK_Align
49 2, // AOK_EVEN
50 2, // AOK_Emit
51 3, // AOK_Input
52 3, // AOK_Output
53 5, // AOK_SizeDirective
54 1, // AOK_Label
55 5, // AOK_EndOfStatement
56 2, // AOK_Skip
57 2 // AOK_IntelExpr
58};
59
60// Represnt the various parts which makes up an intel expression,
61// used for emitting compound intel expressions
62struct IntelExpr {
63 bool NeedBracs;
64 int64_t Imm;
65 StringRef BaseReg;
66 StringRef IndexReg;
67 unsigned Scale;
68
69 IntelExpr(bool needBracs = false) : NeedBracs(needBracs), Imm(0),
70 BaseReg(StringRef()), IndexReg(StringRef()),
71 Scale(1) {}
72 // Compund immediate expression
73 IntelExpr(int64_t imm, bool needBracs) : IntelExpr(needBracs) {
74 Imm = imm;
75 }
76 // [Reg + ImmediateExpression]
77 // We don't bother to emit an immediate expression evaluated to zero
78 IntelExpr(StringRef reg, int64_t imm = 0, unsigned scale = 0,
79 bool needBracs = true) :
80 IntelExpr(imm, needBracs) {
81 IndexReg = reg;
82 if (scale)
83 Scale = scale;
84 }
85 // [BaseReg + IndexReg * ScaleExpression + ImmediateExpression]
86 IntelExpr(StringRef baseReg, StringRef indexReg, unsigned scale = 0,
87 int64_t imm = 0, bool needBracs = true) :
88 IntelExpr(indexReg, imm, scale, needBracs) {
89 BaseReg = baseReg;
90 }
91 bool hasBaseReg() const {
92 return BaseReg.size();
93 }
94 bool hasIndexReg() const {
95 return IndexReg.size();
96 }
97 bool hasRegs() const {
98 return hasBaseReg() || hasIndexReg();
99 }
100 bool isValid() const {
101 return (Scale == 1) ||
102 (hasIndexReg() && (Scale == 2 || Scale == 4 || Scale == 8));
103 }
104};
105
106struct AsmRewrite {
107 AsmRewriteKind Kind;
108 SMLoc Loc;
109 unsigned Len;
110 int64_t Val;
111 StringRef Label;
112 IntelExpr IntelExp;
113
114public:
115 AsmRewrite(AsmRewriteKind kind, SMLoc loc, unsigned len = 0, int64_t val = 0)
116 : Kind(kind), Loc(loc), Len(len), Val(val) {}
117 AsmRewrite(AsmRewriteKind kind, SMLoc loc, unsigned len, StringRef label)
118 : AsmRewrite(kind, loc, len) { Label = label; }
119 AsmRewrite(SMLoc loc, unsigned len, IntelExpr exp)
120 : AsmRewrite(AOK_IntelExpr, loc, len) { IntelExp = exp; }
121};
122
123struct ParseInstructionInfo {
124 SmallVectorImpl<AsmRewrite> *AsmRewrites = nullptr;
125
126 ParseInstructionInfo() = default;
127 ParseInstructionInfo(SmallVectorImpl<AsmRewrite> *rewrites)
128 : AsmRewrites(rewrites) {}
129};
130
131enum OperandMatchResultTy {
132 MatchOperand_Success, // operand matched successfully
133 MatchOperand_NoMatch, // operand did not match
134 MatchOperand_ParseFail // operand matched but had errors
135};
136
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100137enum class DiagnosticPredicateTy {
138 Match,
139 NearMatch,
140 NoMatch,
141};
142
143// When an operand is parsed, the assembler will try to iterate through a set of
144// possible operand classes that the operand might match and call the
145// corresponding PredicateMethod to determine that.
146//
147// If there are two AsmOperands that would give a specific diagnostic if there
148// is no match, there is currently no mechanism to distinguish which operand is
149// a closer match. The DiagnosticPredicate distinguishes between 'completely
150// no match' and 'near match', so the assembler can decide whether to give a
151// specific diagnostic, or use 'InvalidOperand' and continue to find a
152// 'better matching' diagnostic.
153//
154// For example:
155// opcode opnd0, onpd1, opnd2
156//
157// where:
158// opnd2 could be an 'immediate of range [-8, 7]'
159// opnd2 could be a 'register + shift/extend'.
160//
161// If opnd2 is a valid register, but with a wrong shift/extend suffix, it makes
162// little sense to give a diagnostic that the operand should be an immediate
163// in range [-8, 7].
164//
165// This is a light-weight alternative to the 'NearMissInfo' approach
166// below which collects *all* possible diagnostics. This alternative
167// is optional and fully backward compatible with existing
168// PredicateMethods that return a 'bool' (match or no match).
169struct DiagnosticPredicate {
170 DiagnosticPredicateTy Type;
171
172 explicit DiagnosticPredicate(bool Match)
173 : Type(Match ? DiagnosticPredicateTy::Match
174 : DiagnosticPredicateTy::NearMatch) {}
175 DiagnosticPredicate(DiagnosticPredicateTy T) : Type(T) {}
176 DiagnosticPredicate(const DiagnosticPredicate &) = default;
177
178 operator bool() const { return Type == DiagnosticPredicateTy::Match; }
179 bool isMatch() const { return Type == DiagnosticPredicateTy::Match; }
180 bool isNearMatch() const { return Type == DiagnosticPredicateTy::NearMatch; }
181 bool isNoMatch() const { return Type == DiagnosticPredicateTy::NoMatch; }
182};
183
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100184// When matching of an assembly instruction fails, there may be multiple
185// encodings that are close to being a match. It's often ambiguous which one
186// the programmer intended to use, so we want to report an error which mentions
187// each of these "near-miss" encodings. This struct contains information about
188// one such encoding, and why it did not match the parsed instruction.
189class NearMissInfo {
190public:
191 enum NearMissKind {
192 NoNearMiss,
193 NearMissOperand,
194 NearMissFeature,
195 NearMissPredicate,
196 NearMissTooFewOperands,
197 };
198
199 // The encoding is valid for the parsed assembly string. This is only used
200 // internally to the table-generated assembly matcher.
201 static NearMissInfo getSuccess() { return NearMissInfo(); }
202
203 // The instruction encoding is not valid because it requires some target
204 // features that are not currently enabled. MissingFeatures has a bit set for
205 // each feature that the encoding needs but which is not enabled.
206 static NearMissInfo getMissedFeature(uint64_t MissingFeatures) {
207 NearMissInfo Result;
208 Result.Kind = NearMissFeature;
209 Result.Features = MissingFeatures;
210 return Result;
211 }
212
213 // The instruction encoding is not valid because the target-specific
214 // predicate function returned an error code. FailureCode is the
215 // target-specific error code returned by the predicate.
216 static NearMissInfo getMissedPredicate(unsigned FailureCode) {
217 NearMissInfo Result;
218 Result.Kind = NearMissPredicate;
219 Result.PredicateError = FailureCode;
220 return Result;
221 }
222
223 // The instruction encoding is not valid because one (and only one) parsed
224 // operand is not of the correct type. OperandError is the error code
225 // relating to the operand class expected by the encoding. OperandClass is
226 // the type of the expected operand. Opcode is the opcode of the encoding.
227 // OperandIndex is the index into the parsed operand list.
228 static NearMissInfo getMissedOperand(unsigned OperandError,
229 unsigned OperandClass, unsigned Opcode,
230 unsigned OperandIndex) {
231 NearMissInfo Result;
232 Result.Kind = NearMissOperand;
233 Result.MissedOperand.Error = OperandError;
234 Result.MissedOperand.Class = OperandClass;
235 Result.MissedOperand.Opcode = Opcode;
236 Result.MissedOperand.Index = OperandIndex;
237 return Result;
238 }
239
240 // The instruction encoding is not valid because it expects more operands
241 // than were parsed. OperandClass is the class of the expected operand that
242 // was not provided. Opcode is the instruction encoding.
243 static NearMissInfo getTooFewOperands(unsigned OperandClass,
244 unsigned Opcode) {
245 NearMissInfo Result;
246 Result.Kind = NearMissTooFewOperands;
247 Result.TooFewOperands.Class = OperandClass;
248 Result.TooFewOperands.Opcode = Opcode;
249 return Result;
250 }
251
252 operator bool() const { return Kind != NoNearMiss; }
253
254 NearMissKind getKind() const { return Kind; }
255
256 // Feature flags required by the instruction, that the current target does
257 // not have.
258 uint64_t getFeatures() const {
259 assert(Kind == NearMissFeature);
260 return Features;
261 }
262 // Error code returned by the target predicate when validating this
263 // instruction encoding.
264 unsigned getPredicateError() const {
265 assert(Kind == NearMissPredicate);
266 return PredicateError;
267 }
268 // MatchClassKind of the operand that we expected to see.
269 unsigned getOperandClass() const {
270 assert(Kind == NearMissOperand || Kind == NearMissTooFewOperands);
271 return MissedOperand.Class;
272 }
273 // Opcode of the encoding we were trying to match.
274 unsigned getOpcode() const {
275 assert(Kind == NearMissOperand || Kind == NearMissTooFewOperands);
276 return MissedOperand.Opcode;
277 }
278 // Error code returned when validating the operand.
279 unsigned getOperandError() const {
280 assert(Kind == NearMissOperand);
281 return MissedOperand.Error;
282 }
283 // Index of the actual operand we were trying to match in the list of parsed
284 // operands.
285 unsigned getOperandIndex() const {
286 assert(Kind == NearMissOperand);
287 return MissedOperand.Index;
288 }
289
290private:
291 NearMissKind Kind;
292
293 // These two structs share a common prefix, so we can safely rely on the fact
294 // that they overlap in the union.
295 struct MissedOpInfo {
296 unsigned Class;
297 unsigned Opcode;
298 unsigned Error;
299 unsigned Index;
300 };
301
302 struct TooFewOperandsInfo {
303 unsigned Class;
304 unsigned Opcode;
305 };
306
307 union {
308 uint64_t Features;
309 unsigned PredicateError;
310 MissedOpInfo MissedOperand;
311 TooFewOperandsInfo TooFewOperands;
312 };
313
314 NearMissInfo() : Kind(NoNearMiss) {}
315};
316
317/// MCTargetAsmParser - Generic interface to target specific assembly parsers.
318class MCTargetAsmParser : public MCAsmParserExtension {
319public:
320 enum MatchResultTy {
321 Match_InvalidOperand,
322 Match_InvalidTiedOperand,
323 Match_MissingFeature,
324 Match_MnemonicFail,
325 Match_Success,
326 Match_NearMisses,
327 FIRST_TARGET_MATCH_RESULT_TY
328 };
329
330protected: // Can only create subclasses.
331 MCTargetAsmParser(MCTargetOptions const &, const MCSubtargetInfo &STI,
332 const MCInstrInfo &MII);
333
334 /// Create a copy of STI and return a non-const reference to it.
335 MCSubtargetInfo &copySTI();
336
337 /// AvailableFeatures - The current set of available features.
338 uint64_t AvailableFeatures = 0;
339
340 /// ParsingInlineAsm - Are we parsing ms-style inline assembly?
341 bool ParsingInlineAsm = false;
342
343 /// SemaCallback - The Sema callback implementation. Must be set when parsing
344 /// ms-style inline assembly.
345 MCAsmParserSemaCallback *SemaCallback;
346
347 /// Set of options which affects instrumentation of inline assembly.
348 MCTargetOptions MCOptions;
349
350 /// Current STI.
351 const MCSubtargetInfo *STI;
352
353 const MCInstrInfo &MII;
354
355public:
356 MCTargetAsmParser(const MCTargetAsmParser &) = delete;
357 MCTargetAsmParser &operator=(const MCTargetAsmParser &) = delete;
358
359 ~MCTargetAsmParser() override;
360
361 const MCSubtargetInfo &getSTI() const;
362
363 uint64_t getAvailableFeatures() const { return AvailableFeatures; }
364 void setAvailableFeatures(uint64_t Value) { AvailableFeatures = Value; }
365
366 bool isParsingInlineAsm () { return ParsingInlineAsm; }
367 void setParsingInlineAsm (bool Value) { ParsingInlineAsm = Value; }
368
369 MCTargetOptions getTargetOptions() const { return MCOptions; }
370
371 void setSemaCallback(MCAsmParserSemaCallback *Callback) {
372 SemaCallback = Callback;
373 }
374
Andrew Scull0372a572018-11-16 15:47:06 +0000375 // Target-specific parsing of expression.
376 virtual bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
377 return getParser().parsePrimaryExpr(Res, EndLoc);
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100378 }
379
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100380 virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
381 SMLoc &EndLoc) = 0;
382
383 /// Sets frame register corresponding to the current MachineFunction.
384 virtual void SetFrameRegister(unsigned RegNo) {}
385
386 /// ParseInstruction - Parse one assembly instruction.
387 ///
388 /// The parser is positioned following the instruction name. The target
389 /// specific instruction parser should parse the entire instruction and
390 /// construct the appropriate MCInst, or emit an error. On success, the entire
391 /// line should be parsed up to and including the end-of-statement token. On
392 /// failure, the parser is not required to read to the end of the line.
393 //
394 /// \param Name - The instruction name.
395 /// \param NameLoc - The source location of the name.
396 /// \param Operands [out] - The list of parsed operands, this returns
397 /// ownership of them to the caller.
398 /// \return True on failure.
399 virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
400 SMLoc NameLoc, OperandVector &Operands) = 0;
401 virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
402 AsmToken Token, OperandVector &Operands) {
403 return ParseInstruction(Info, Name, Token.getLoc(), Operands);
404 }
405
406 /// ParseDirective - Parse a target specific assembler directive
407 ///
408 /// The parser is positioned following the directive name. The target
409 /// specific directive parser should parse the entire directive doing or
410 /// recording any target specific work, or return true and do nothing if the
411 /// directive is not target specific. If the directive is specific for
412 /// the target, the entire line is parsed up to and including the
413 /// end-of-statement token and false is returned.
414 ///
415 /// \param DirectiveID - the identifier token of the directive.
416 virtual bool ParseDirective(AsmToken DirectiveID) = 0;
417
418 /// MatchAndEmitInstruction - Recognize a series of operands of a parsed
419 /// instruction as an actual MCInst and emit it to the specified MCStreamer.
420 /// This returns false on success and returns true on failure to match.
421 ///
422 /// On failure, the target parser is responsible for emitting a diagnostic
423 /// explaining the match failure.
424 virtual bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
425 OperandVector &Operands, MCStreamer &Out,
426 uint64_t &ErrorInfo,
427 bool MatchingInlineAsm) = 0;
428
429 /// Allows targets to let registers opt out of clobber lists.
430 virtual bool OmitRegisterFromClobberLists(unsigned RegNo) { return false; }
431
432 /// Allow a target to add special case operand matching for things that
433 /// tblgen doesn't/can't handle effectively. For example, literal
434 /// immediates on ARM. TableGen expects a token operand, but the parser
435 /// will recognize them as immediates.
436 virtual unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
437 unsigned Kind) {
438 return Match_InvalidOperand;
439 }
440
441 /// Validate the instruction match against any complex target predicates
442 /// before rendering any operands to it.
443 virtual unsigned
444 checkEarlyTargetMatchPredicate(MCInst &Inst, const OperandVector &Operands) {
445 return Match_Success;
446 }
447
448 /// checkTargetMatchPredicate - Validate the instruction match against
449 /// any complex target predicates not expressible via match classes.
450 virtual unsigned checkTargetMatchPredicate(MCInst &Inst) {
451 return Match_Success;
452 }
453
454 virtual void convertToMapAndConstraints(unsigned Kind,
455 const OperandVector &Operands) = 0;
456
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100457 /// Returns whether two registers are equal and is used by the tied-operands
458 /// checks in the AsmMatcher. This method can be overridden allow e.g. a
459 /// sub- or super-register as the tied operand.
460 virtual bool regsEqual(const MCParsedAsmOperand &Op1,
461 const MCParsedAsmOperand &Op2) const {
462 assert(Op1.isReg() && Op2.isReg() && "Operands not all regs");
463 return Op1.getReg() == Op2.getReg();
464 }
465
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100466 // Return whether this parser uses assignment statements with equals tokens
467 virtual bool equalIsAsmAssignment() { return true; };
468 // Return whether this start of statement identifier is a label
469 virtual bool isLabel(AsmToken &Token) { return true; };
470 // Return whether this parser accept star as start of statement
471 virtual bool starIsStartOfStatement() { return false; };
472
473 virtual const MCExpr *applyModifierToExpr(const MCExpr *E,
474 MCSymbolRefExpr::VariantKind,
475 MCContext &Ctx) {
476 return nullptr;
477 }
478
Andrew Scull0372a572018-11-16 15:47:06 +0000479 // For actions that have to be performed before a label is emitted
480 virtual void doBeforeLabelEmit(MCSymbol *Symbol) {}
481
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100482 virtual void onLabelParsed(MCSymbol *Symbol) {}
483
484 /// Ensure that all previously parsed instructions have been emitted to the
485 /// output streamer, if the target does not emit them immediately.
486 virtual void flushPendingInstructions(MCStreamer &Out) {}
487
488 virtual const MCExpr *createTargetUnaryExpr(const MCExpr *E,
489 AsmToken::TokenKind OperatorToken,
490 MCContext &Ctx) {
491 return nullptr;
492 }
493};
494
495} // end namespace llvm
496
497#endif // LLVM_MC_MCPARSER_MCTARGETASMPARSER_H