blob: d628794b32e232fae4c673c07c70b9c503a4289a [file] [log] [blame]
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01001//===- llvm/MC/MCTargetAsmParser.h - Target Assembly Parser -----*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#ifndef LLVM_MC_MCPARSER_MCTARGETASMPARSER_H
11#define LLVM_MC_MCPARSER_MCTARGETASMPARSER_H
12
13#include "llvm/ADT/StringRef.h"
14#include "llvm/MC/MCExpr.h"
15#include "llvm/MC/MCInstrInfo.h"
16#include "llvm/MC/MCParser/MCAsmLexer.h"
17#include "llvm/MC/MCParser/MCAsmParserExtension.h"
18#include "llvm/MC/MCTargetOptions.h"
19#include "llvm/Support/SMLoc.h"
20#include <cstdint>
21#include <memory>
22
23namespace llvm {
24
25class MCInst;
26class MCParsedAsmOperand;
27class MCStreamer;
28class MCSubtargetInfo;
29template <typename T> class SmallVectorImpl;
30
31using OperandVector = SmallVectorImpl<std::unique_ptr<MCParsedAsmOperand>>;
32
33enum AsmRewriteKind {
34 AOK_Align, // Rewrite align as .align.
35 AOK_EVEN, // Rewrite even as .even.
36 AOK_Emit, // Rewrite _emit as .byte.
37 AOK_Input, // Rewrite in terms of $N.
38 AOK_Output, // Rewrite in terms of $N.
39 AOK_SizeDirective, // Add a sizing directive (e.g., dword ptr).
40 AOK_Label, // Rewrite local labels.
41 AOK_EndOfStatement, // Add EndOfStatement (e.g., "\n\t").
42 AOK_Skip, // Skip emission (e.g., offset/type operators).
43 AOK_IntelExpr // SizeDirective SymDisp [BaseReg + IndexReg * Scale + ImmDisp]
44};
45
46const char AsmRewritePrecedence [] = {
47 2, // AOK_Align
48 2, // AOK_EVEN
49 2, // AOK_Emit
50 3, // AOK_Input
51 3, // AOK_Output
52 5, // AOK_SizeDirective
53 1, // AOK_Label
54 5, // AOK_EndOfStatement
55 2, // AOK_Skip
56 2 // AOK_IntelExpr
57};
58
59// Represnt the various parts which makes up an intel expression,
60// used for emitting compound intel expressions
61struct IntelExpr {
62 bool NeedBracs;
63 int64_t Imm;
64 StringRef BaseReg;
65 StringRef IndexReg;
66 unsigned Scale;
67
68 IntelExpr(bool needBracs = false) : NeedBracs(needBracs), Imm(0),
69 BaseReg(StringRef()), IndexReg(StringRef()),
70 Scale(1) {}
71 // Compund immediate expression
72 IntelExpr(int64_t imm, bool needBracs) : IntelExpr(needBracs) {
73 Imm = imm;
74 }
75 // [Reg + ImmediateExpression]
76 // We don't bother to emit an immediate expression evaluated to zero
77 IntelExpr(StringRef reg, int64_t imm = 0, unsigned scale = 0,
78 bool needBracs = true) :
79 IntelExpr(imm, needBracs) {
80 IndexReg = reg;
81 if (scale)
82 Scale = scale;
83 }
84 // [BaseReg + IndexReg * ScaleExpression + ImmediateExpression]
85 IntelExpr(StringRef baseReg, StringRef indexReg, unsigned scale = 0,
86 int64_t imm = 0, bool needBracs = true) :
87 IntelExpr(indexReg, imm, scale, needBracs) {
88 BaseReg = baseReg;
89 }
90 bool hasBaseReg() const {
91 return BaseReg.size();
92 }
93 bool hasIndexReg() const {
94 return IndexReg.size();
95 }
96 bool hasRegs() const {
97 return hasBaseReg() || hasIndexReg();
98 }
99 bool isValid() const {
100 return (Scale == 1) ||
101 (hasIndexReg() && (Scale == 2 || Scale == 4 || Scale == 8));
102 }
103};
104
105struct AsmRewrite {
106 AsmRewriteKind Kind;
107 SMLoc Loc;
108 unsigned Len;
109 int64_t Val;
110 StringRef Label;
111 IntelExpr IntelExp;
112
113public:
114 AsmRewrite(AsmRewriteKind kind, SMLoc loc, unsigned len = 0, int64_t val = 0)
115 : Kind(kind), Loc(loc), Len(len), Val(val) {}
116 AsmRewrite(AsmRewriteKind kind, SMLoc loc, unsigned len, StringRef label)
117 : AsmRewrite(kind, loc, len) { Label = label; }
118 AsmRewrite(SMLoc loc, unsigned len, IntelExpr exp)
119 : AsmRewrite(AOK_IntelExpr, loc, len) { IntelExp = exp; }
120};
121
122struct ParseInstructionInfo {
123 SmallVectorImpl<AsmRewrite> *AsmRewrites = nullptr;
124
125 ParseInstructionInfo() = default;
126 ParseInstructionInfo(SmallVectorImpl<AsmRewrite> *rewrites)
127 : AsmRewrites(rewrites) {}
128};
129
130enum OperandMatchResultTy {
131 MatchOperand_Success, // operand matched successfully
132 MatchOperand_NoMatch, // operand did not match
133 MatchOperand_ParseFail // operand matched but had errors
134};
135
136// When matching of an assembly instruction fails, there may be multiple
137// encodings that are close to being a match. It's often ambiguous which one
138// the programmer intended to use, so we want to report an error which mentions
139// each of these "near-miss" encodings. This struct contains information about
140// one such encoding, and why it did not match the parsed instruction.
141class NearMissInfo {
142public:
143 enum NearMissKind {
144 NoNearMiss,
145 NearMissOperand,
146 NearMissFeature,
147 NearMissPredicate,
148 NearMissTooFewOperands,
149 };
150
151 // The encoding is valid for the parsed assembly string. This is only used
152 // internally to the table-generated assembly matcher.
153 static NearMissInfo getSuccess() { return NearMissInfo(); }
154
155 // The instruction encoding is not valid because it requires some target
156 // features that are not currently enabled. MissingFeatures has a bit set for
157 // each feature that the encoding needs but which is not enabled.
158 static NearMissInfo getMissedFeature(uint64_t MissingFeatures) {
159 NearMissInfo Result;
160 Result.Kind = NearMissFeature;
161 Result.Features = MissingFeatures;
162 return Result;
163 }
164
165 // The instruction encoding is not valid because the target-specific
166 // predicate function returned an error code. FailureCode is the
167 // target-specific error code returned by the predicate.
168 static NearMissInfo getMissedPredicate(unsigned FailureCode) {
169 NearMissInfo Result;
170 Result.Kind = NearMissPredicate;
171 Result.PredicateError = FailureCode;
172 return Result;
173 }
174
175 // The instruction encoding is not valid because one (and only one) parsed
176 // operand is not of the correct type. OperandError is the error code
177 // relating to the operand class expected by the encoding. OperandClass is
178 // the type of the expected operand. Opcode is the opcode of the encoding.
179 // OperandIndex is the index into the parsed operand list.
180 static NearMissInfo getMissedOperand(unsigned OperandError,
181 unsigned OperandClass, unsigned Opcode,
182 unsigned OperandIndex) {
183 NearMissInfo Result;
184 Result.Kind = NearMissOperand;
185 Result.MissedOperand.Error = OperandError;
186 Result.MissedOperand.Class = OperandClass;
187 Result.MissedOperand.Opcode = Opcode;
188 Result.MissedOperand.Index = OperandIndex;
189 return Result;
190 }
191
192 // The instruction encoding is not valid because it expects more operands
193 // than were parsed. OperandClass is the class of the expected operand that
194 // was not provided. Opcode is the instruction encoding.
195 static NearMissInfo getTooFewOperands(unsigned OperandClass,
196 unsigned Opcode) {
197 NearMissInfo Result;
198 Result.Kind = NearMissTooFewOperands;
199 Result.TooFewOperands.Class = OperandClass;
200 Result.TooFewOperands.Opcode = Opcode;
201 return Result;
202 }
203
204 operator bool() const { return Kind != NoNearMiss; }
205
206 NearMissKind getKind() const { return Kind; }
207
208 // Feature flags required by the instruction, that the current target does
209 // not have.
210 uint64_t getFeatures() const {
211 assert(Kind == NearMissFeature);
212 return Features;
213 }
214 // Error code returned by the target predicate when validating this
215 // instruction encoding.
216 unsigned getPredicateError() const {
217 assert(Kind == NearMissPredicate);
218 return PredicateError;
219 }
220 // MatchClassKind of the operand that we expected to see.
221 unsigned getOperandClass() const {
222 assert(Kind == NearMissOperand || Kind == NearMissTooFewOperands);
223 return MissedOperand.Class;
224 }
225 // Opcode of the encoding we were trying to match.
226 unsigned getOpcode() const {
227 assert(Kind == NearMissOperand || Kind == NearMissTooFewOperands);
228 return MissedOperand.Opcode;
229 }
230 // Error code returned when validating the operand.
231 unsigned getOperandError() const {
232 assert(Kind == NearMissOperand);
233 return MissedOperand.Error;
234 }
235 // Index of the actual operand we were trying to match in the list of parsed
236 // operands.
237 unsigned getOperandIndex() const {
238 assert(Kind == NearMissOperand);
239 return MissedOperand.Index;
240 }
241
242private:
243 NearMissKind Kind;
244
245 // These two structs share a common prefix, so we can safely rely on the fact
246 // that they overlap in the union.
247 struct MissedOpInfo {
248 unsigned Class;
249 unsigned Opcode;
250 unsigned Error;
251 unsigned Index;
252 };
253
254 struct TooFewOperandsInfo {
255 unsigned Class;
256 unsigned Opcode;
257 };
258
259 union {
260 uint64_t Features;
261 unsigned PredicateError;
262 MissedOpInfo MissedOperand;
263 TooFewOperandsInfo TooFewOperands;
264 };
265
266 NearMissInfo() : Kind(NoNearMiss) {}
267};
268
269/// MCTargetAsmParser - Generic interface to target specific assembly parsers.
270class MCTargetAsmParser : public MCAsmParserExtension {
271public:
272 enum MatchResultTy {
273 Match_InvalidOperand,
274 Match_InvalidTiedOperand,
275 Match_MissingFeature,
276 Match_MnemonicFail,
277 Match_Success,
278 Match_NearMisses,
279 FIRST_TARGET_MATCH_RESULT_TY
280 };
281
282protected: // Can only create subclasses.
283 MCTargetAsmParser(MCTargetOptions const &, const MCSubtargetInfo &STI,
284 const MCInstrInfo &MII);
285
286 /// Create a copy of STI and return a non-const reference to it.
287 MCSubtargetInfo &copySTI();
288
289 /// AvailableFeatures - The current set of available features.
290 uint64_t AvailableFeatures = 0;
291
292 /// ParsingInlineAsm - Are we parsing ms-style inline assembly?
293 bool ParsingInlineAsm = false;
294
295 /// SemaCallback - The Sema callback implementation. Must be set when parsing
296 /// ms-style inline assembly.
297 MCAsmParserSemaCallback *SemaCallback;
298
299 /// Set of options which affects instrumentation of inline assembly.
300 MCTargetOptions MCOptions;
301
302 /// Current STI.
303 const MCSubtargetInfo *STI;
304
305 const MCInstrInfo &MII;
306
307public:
308 MCTargetAsmParser(const MCTargetAsmParser &) = delete;
309 MCTargetAsmParser &operator=(const MCTargetAsmParser &) = delete;
310
311 ~MCTargetAsmParser() override;
312
313 const MCSubtargetInfo &getSTI() const;
314
315 uint64_t getAvailableFeatures() const { return AvailableFeatures; }
316 void setAvailableFeatures(uint64_t Value) { AvailableFeatures = Value; }
317
318 bool isParsingInlineAsm () { return ParsingInlineAsm; }
319 void setParsingInlineAsm (bool Value) { ParsingInlineAsm = Value; }
320
321 MCTargetOptions getTargetOptions() const { return MCOptions; }
322
323 void setSemaCallback(MCAsmParserSemaCallback *Callback) {
324 SemaCallback = Callback;
325 }
326
327 virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
328 SMLoc &EndLoc) = 0;
329
330 /// Sets frame register corresponding to the current MachineFunction.
331 virtual void SetFrameRegister(unsigned RegNo) {}
332
333 /// ParseInstruction - Parse one assembly instruction.
334 ///
335 /// The parser is positioned following the instruction name. The target
336 /// specific instruction parser should parse the entire instruction and
337 /// construct the appropriate MCInst, or emit an error. On success, the entire
338 /// line should be parsed up to and including the end-of-statement token. On
339 /// failure, the parser is not required to read to the end of the line.
340 //
341 /// \param Name - The instruction name.
342 /// \param NameLoc - The source location of the name.
343 /// \param Operands [out] - The list of parsed operands, this returns
344 /// ownership of them to the caller.
345 /// \return True on failure.
346 virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
347 SMLoc NameLoc, OperandVector &Operands) = 0;
348 virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
349 AsmToken Token, OperandVector &Operands) {
350 return ParseInstruction(Info, Name, Token.getLoc(), Operands);
351 }
352
353 /// ParseDirective - Parse a target specific assembler directive
354 ///
355 /// The parser is positioned following the directive name. The target
356 /// specific directive parser should parse the entire directive doing or
357 /// recording any target specific work, or return true and do nothing if the
358 /// directive is not target specific. If the directive is specific for
359 /// the target, the entire line is parsed up to and including the
360 /// end-of-statement token and false is returned.
361 ///
362 /// \param DirectiveID - the identifier token of the directive.
363 virtual bool ParseDirective(AsmToken DirectiveID) = 0;
364
365 /// MatchAndEmitInstruction - Recognize a series of operands of a parsed
366 /// instruction as an actual MCInst and emit it to the specified MCStreamer.
367 /// This returns false on success and returns true on failure to match.
368 ///
369 /// On failure, the target parser is responsible for emitting a diagnostic
370 /// explaining the match failure.
371 virtual bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
372 OperandVector &Operands, MCStreamer &Out,
373 uint64_t &ErrorInfo,
374 bool MatchingInlineAsm) = 0;
375
376 /// Allows targets to let registers opt out of clobber lists.
377 virtual bool OmitRegisterFromClobberLists(unsigned RegNo) { return false; }
378
379 /// Allow a target to add special case operand matching for things that
380 /// tblgen doesn't/can't handle effectively. For example, literal
381 /// immediates on ARM. TableGen expects a token operand, but the parser
382 /// will recognize them as immediates.
383 virtual unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
384 unsigned Kind) {
385 return Match_InvalidOperand;
386 }
387
388 /// Validate the instruction match against any complex target predicates
389 /// before rendering any operands to it.
390 virtual unsigned
391 checkEarlyTargetMatchPredicate(MCInst &Inst, const OperandVector &Operands) {
392 return Match_Success;
393 }
394
395 /// checkTargetMatchPredicate - Validate the instruction match against
396 /// any complex target predicates not expressible via match classes.
397 virtual unsigned checkTargetMatchPredicate(MCInst &Inst) {
398 return Match_Success;
399 }
400
401 virtual void convertToMapAndConstraints(unsigned Kind,
402 const OperandVector &Operands) = 0;
403
404 // Return whether this parser uses assignment statements with equals tokens
405 virtual bool equalIsAsmAssignment() { return true; };
406 // Return whether this start of statement identifier is a label
407 virtual bool isLabel(AsmToken &Token) { return true; };
408 // Return whether this parser accept star as start of statement
409 virtual bool starIsStartOfStatement() { return false; };
410
411 virtual const MCExpr *applyModifierToExpr(const MCExpr *E,
412 MCSymbolRefExpr::VariantKind,
413 MCContext &Ctx) {
414 return nullptr;
415 }
416
417 virtual void onLabelParsed(MCSymbol *Symbol) {}
418
419 /// Ensure that all previously parsed instructions have been emitted to the
420 /// output streamer, if the target does not emit them immediately.
421 virtual void flushPendingInstructions(MCStreamer &Out) {}
422
423 virtual const MCExpr *createTargetUnaryExpr(const MCExpr *E,
424 AsmToken::TokenKind OperatorToken,
425 MCContext &Ctx) {
426 return nullptr;
427 }
428};
429
430} // end namespace llvm
431
432#endif // LLVM_MC_MCPARSER_MCTARGETASMPARSER_H