Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 1 | //===- Twine.h - Fast Temporary String Concatenation ------------*- C++ -*-===// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | |
| 10 | #ifndef LLVM_ADT_TWINE_H |
| 11 | #define LLVM_ADT_TWINE_H |
| 12 | |
| 13 | #include "llvm/ADT/SmallVector.h" |
| 14 | #include "llvm/ADT/StringRef.h" |
| 15 | #include "llvm/Support/ErrorHandling.h" |
| 16 | #include <cassert> |
| 17 | #include <cstdint> |
| 18 | #include <string> |
| 19 | |
| 20 | namespace llvm { |
| 21 | |
| 22 | class formatv_object_base; |
| 23 | class raw_ostream; |
| 24 | |
| 25 | /// Twine - A lightweight data structure for efficiently representing the |
| 26 | /// concatenation of temporary values as strings. |
| 27 | /// |
| 28 | /// A Twine is a kind of rope, it represents a concatenated string using a |
| 29 | /// binary-tree, where the string is the preorder of the nodes. Since the |
| 30 | /// Twine can be efficiently rendered into a buffer when its result is used, |
| 31 | /// it avoids the cost of generating temporary values for intermediate string |
| 32 | /// results -- particularly in cases when the Twine result is never |
| 33 | /// required. By explicitly tracking the type of leaf nodes, we can also avoid |
| 34 | /// the creation of temporary strings for conversions operations (such as |
| 35 | /// appending an integer to a string). |
| 36 | /// |
| 37 | /// A Twine is not intended for use directly and should not be stored, its |
| 38 | /// implementation relies on the ability to store pointers to temporary stack |
| 39 | /// objects which may be deallocated at the end of a statement. Twines should |
| 40 | /// only be used accepted as const references in arguments, when an API wishes |
| 41 | /// to accept possibly-concatenated strings. |
| 42 | /// |
| 43 | /// Twines support a special 'null' value, which always concatenates to form |
| 44 | /// itself, and renders as an empty string. This can be returned from APIs to |
| 45 | /// effectively nullify any concatenations performed on the result. |
| 46 | /// |
| 47 | /// \b Implementation |
| 48 | /// |
| 49 | /// Given the nature of a Twine, it is not possible for the Twine's |
| 50 | /// concatenation method to construct interior nodes; the result must be |
| 51 | /// represented inside the returned value. For this reason a Twine object |
| 52 | /// actually holds two values, the left- and right-hand sides of a |
| 53 | /// concatenation. We also have nullary Twine objects, which are effectively |
| 54 | /// sentinel values that represent empty strings. |
| 55 | /// |
| 56 | /// Thus, a Twine can effectively have zero, one, or two children. The \see |
| 57 | /// isNullary(), \see isUnary(), and \see isBinary() predicates exist for |
| 58 | /// testing the number of children. |
| 59 | /// |
| 60 | /// We maintain a number of invariants on Twine objects (FIXME: Why): |
| 61 | /// - Nullary twines are always represented with their Kind on the left-hand |
| 62 | /// side, and the Empty kind on the right-hand side. |
| 63 | /// - Unary twines are always represented with the value on the left-hand |
| 64 | /// side, and the Empty kind on the right-hand side. |
| 65 | /// - If a Twine has another Twine as a child, that child should always be |
| 66 | /// binary (otherwise it could have been folded into the parent). |
| 67 | /// |
| 68 | /// These invariants are check by \see isValid(). |
| 69 | /// |
| 70 | /// \b Efficiency Considerations |
| 71 | /// |
| 72 | /// The Twine is designed to yield efficient and small code for common |
| 73 | /// situations. For this reason, the concat() method is inlined so that |
| 74 | /// concatenations of leaf nodes can be optimized into stores directly into a |
| 75 | /// single stack allocated object. |
| 76 | /// |
| 77 | /// In practice, not all compilers can be trusted to optimize concat() fully, |
| 78 | /// so we provide two additional methods (and accompanying operator+ |
| 79 | /// overloads) to guarantee that particularly important cases (cstring plus |
| 80 | /// StringRef) codegen as desired. |
| 81 | class Twine { |
| 82 | /// NodeKind - Represent the type of an argument. |
| 83 | enum NodeKind : unsigned char { |
| 84 | /// An empty string; the result of concatenating anything with it is also |
| 85 | /// empty. |
| 86 | NullKind, |
| 87 | |
| 88 | /// The empty string. |
| 89 | EmptyKind, |
| 90 | |
| 91 | /// A pointer to a Twine instance. |
| 92 | TwineKind, |
| 93 | |
| 94 | /// A pointer to a C string instance. |
| 95 | CStringKind, |
| 96 | |
| 97 | /// A pointer to an std::string instance. |
| 98 | StdStringKind, |
| 99 | |
| 100 | /// A pointer to a StringRef instance. |
| 101 | StringRefKind, |
| 102 | |
| 103 | /// A pointer to a SmallString instance. |
| 104 | SmallStringKind, |
| 105 | |
| 106 | /// A pointer to a formatv_object_base instance. |
| 107 | FormatvObjectKind, |
| 108 | |
| 109 | /// A char value, to render as a character. |
| 110 | CharKind, |
| 111 | |
| 112 | /// An unsigned int value, to render as an unsigned decimal integer. |
| 113 | DecUIKind, |
| 114 | |
| 115 | /// An int value, to render as a signed decimal integer. |
| 116 | DecIKind, |
| 117 | |
| 118 | /// A pointer to an unsigned long value, to render as an unsigned decimal |
| 119 | /// integer. |
| 120 | DecULKind, |
| 121 | |
| 122 | /// A pointer to a long value, to render as a signed decimal integer. |
| 123 | DecLKind, |
| 124 | |
| 125 | /// A pointer to an unsigned long long value, to render as an unsigned |
| 126 | /// decimal integer. |
| 127 | DecULLKind, |
| 128 | |
| 129 | /// A pointer to a long long value, to render as a signed decimal integer. |
| 130 | DecLLKind, |
| 131 | |
| 132 | /// A pointer to a uint64_t value, to render as an unsigned hexadecimal |
| 133 | /// integer. |
| 134 | UHexKind |
| 135 | }; |
| 136 | |
| 137 | union Child |
| 138 | { |
| 139 | const Twine *twine; |
| 140 | const char *cString; |
| 141 | const std::string *stdString; |
| 142 | const StringRef *stringRef; |
| 143 | const SmallVectorImpl<char> *smallString; |
| 144 | const formatv_object_base *formatvObject; |
| 145 | char character; |
| 146 | unsigned int decUI; |
| 147 | int decI; |
| 148 | const unsigned long *decUL; |
| 149 | const long *decL; |
| 150 | const unsigned long long *decULL; |
| 151 | const long long *decLL; |
| 152 | const uint64_t *uHex; |
| 153 | }; |
| 154 | |
| 155 | /// LHS - The prefix in the concatenation, which may be uninitialized for |
| 156 | /// Null or Empty kinds. |
| 157 | Child LHS; |
| 158 | |
| 159 | /// RHS - The suffix in the concatenation, which may be uninitialized for |
| 160 | /// Null or Empty kinds. |
| 161 | Child RHS; |
| 162 | |
| 163 | /// LHSKind - The NodeKind of the left hand side, \see getLHSKind(). |
| 164 | NodeKind LHSKind = EmptyKind; |
| 165 | |
| 166 | /// RHSKind - The NodeKind of the right hand side, \see getRHSKind(). |
| 167 | NodeKind RHSKind = EmptyKind; |
| 168 | |
| 169 | /// Construct a nullary twine; the kind must be NullKind or EmptyKind. |
| 170 | explicit Twine(NodeKind Kind) : LHSKind(Kind) { |
| 171 | assert(isNullary() && "Invalid kind!"); |
| 172 | } |
| 173 | |
| 174 | /// Construct a binary twine. |
| 175 | explicit Twine(const Twine &LHS, const Twine &RHS) |
| 176 | : LHSKind(TwineKind), RHSKind(TwineKind) { |
| 177 | this->LHS.twine = &LHS; |
| 178 | this->RHS.twine = &RHS; |
| 179 | assert(isValid() && "Invalid twine!"); |
| 180 | } |
| 181 | |
| 182 | /// Construct a twine from explicit values. |
| 183 | explicit Twine(Child LHS, NodeKind LHSKind, Child RHS, NodeKind RHSKind) |
| 184 | : LHS(LHS), RHS(RHS), LHSKind(LHSKind), RHSKind(RHSKind) { |
| 185 | assert(isValid() && "Invalid twine!"); |
| 186 | } |
| 187 | |
| 188 | /// Check for the null twine. |
| 189 | bool isNull() const { |
| 190 | return getLHSKind() == NullKind; |
| 191 | } |
| 192 | |
| 193 | /// Check for the empty twine. |
| 194 | bool isEmpty() const { |
| 195 | return getLHSKind() == EmptyKind; |
| 196 | } |
| 197 | |
| 198 | /// Check if this is a nullary twine (null or empty). |
| 199 | bool isNullary() const { |
| 200 | return isNull() || isEmpty(); |
| 201 | } |
| 202 | |
| 203 | /// Check if this is a unary twine. |
| 204 | bool isUnary() const { |
| 205 | return getRHSKind() == EmptyKind && !isNullary(); |
| 206 | } |
| 207 | |
| 208 | /// Check if this is a binary twine. |
| 209 | bool isBinary() const { |
| 210 | return getLHSKind() != NullKind && getRHSKind() != EmptyKind; |
| 211 | } |
| 212 | |
| 213 | /// Check if this is a valid twine (satisfying the invariants on |
| 214 | /// order and number of arguments). |
| 215 | bool isValid() const { |
| 216 | // Nullary twines always have Empty on the RHS. |
| 217 | if (isNullary() && getRHSKind() != EmptyKind) |
| 218 | return false; |
| 219 | |
| 220 | // Null should never appear on the RHS. |
| 221 | if (getRHSKind() == NullKind) |
| 222 | return false; |
| 223 | |
| 224 | // The RHS cannot be non-empty if the LHS is empty. |
| 225 | if (getRHSKind() != EmptyKind && getLHSKind() == EmptyKind) |
| 226 | return false; |
| 227 | |
| 228 | // A twine child should always be binary. |
| 229 | if (getLHSKind() == TwineKind && |
| 230 | !LHS.twine->isBinary()) |
| 231 | return false; |
| 232 | if (getRHSKind() == TwineKind && |
| 233 | !RHS.twine->isBinary()) |
| 234 | return false; |
| 235 | |
| 236 | return true; |
| 237 | } |
| 238 | |
| 239 | /// Get the NodeKind of the left-hand side. |
| 240 | NodeKind getLHSKind() const { return LHSKind; } |
| 241 | |
| 242 | /// Get the NodeKind of the right-hand side. |
| 243 | NodeKind getRHSKind() const { return RHSKind; } |
| 244 | |
| 245 | /// Print one child from a twine. |
| 246 | void printOneChild(raw_ostream &OS, Child Ptr, NodeKind Kind) const; |
| 247 | |
| 248 | /// Print the representation of one child from a twine. |
| 249 | void printOneChildRepr(raw_ostream &OS, Child Ptr, |
| 250 | NodeKind Kind) const; |
| 251 | |
| 252 | public: |
| 253 | /// @name Constructors |
| 254 | /// @{ |
| 255 | |
| 256 | /// Construct from an empty string. |
| 257 | /*implicit*/ Twine() { |
| 258 | assert(isValid() && "Invalid twine!"); |
| 259 | } |
| 260 | |
| 261 | Twine(const Twine &) = default; |
| 262 | |
| 263 | /// Construct from a C string. |
| 264 | /// |
| 265 | /// We take care here to optimize "" into the empty twine -- this will be |
| 266 | /// optimized out for string constants. This allows Twine arguments have |
| 267 | /// default "" values, without introducing unnecessary string constants. |
| 268 | /*implicit*/ Twine(const char *Str) { |
| 269 | if (Str[0] != '\0') { |
| 270 | LHS.cString = Str; |
| 271 | LHSKind = CStringKind; |
| 272 | } else |
| 273 | LHSKind = EmptyKind; |
| 274 | |
| 275 | assert(isValid() && "Invalid twine!"); |
| 276 | } |
| 277 | |
| 278 | /// Construct from an std::string. |
| 279 | /*implicit*/ Twine(const std::string &Str) : LHSKind(StdStringKind) { |
| 280 | LHS.stdString = &Str; |
| 281 | assert(isValid() && "Invalid twine!"); |
| 282 | } |
| 283 | |
| 284 | /// Construct from a StringRef. |
| 285 | /*implicit*/ Twine(const StringRef &Str) : LHSKind(StringRefKind) { |
| 286 | LHS.stringRef = &Str; |
| 287 | assert(isValid() && "Invalid twine!"); |
| 288 | } |
| 289 | |
| 290 | /// Construct from a SmallString. |
| 291 | /*implicit*/ Twine(const SmallVectorImpl<char> &Str) |
| 292 | : LHSKind(SmallStringKind) { |
| 293 | LHS.smallString = &Str; |
| 294 | assert(isValid() && "Invalid twine!"); |
| 295 | } |
| 296 | |
| 297 | /// Construct from a formatv_object_base. |
| 298 | /*implicit*/ Twine(const formatv_object_base &Fmt) |
| 299 | : LHSKind(FormatvObjectKind) { |
| 300 | LHS.formatvObject = &Fmt; |
| 301 | assert(isValid() && "Invalid twine!"); |
| 302 | } |
| 303 | |
| 304 | /// Construct from a char. |
| 305 | explicit Twine(char Val) : LHSKind(CharKind) { |
| 306 | LHS.character = Val; |
| 307 | } |
| 308 | |
| 309 | /// Construct from a signed char. |
| 310 | explicit Twine(signed char Val) : LHSKind(CharKind) { |
| 311 | LHS.character = static_cast<char>(Val); |
| 312 | } |
| 313 | |
| 314 | /// Construct from an unsigned char. |
| 315 | explicit Twine(unsigned char Val) : LHSKind(CharKind) { |
| 316 | LHS.character = static_cast<char>(Val); |
| 317 | } |
| 318 | |
| 319 | /// Construct a twine to print \p Val as an unsigned decimal integer. |
| 320 | explicit Twine(unsigned Val) : LHSKind(DecUIKind) { |
| 321 | LHS.decUI = Val; |
| 322 | } |
| 323 | |
| 324 | /// Construct a twine to print \p Val as a signed decimal integer. |
| 325 | explicit Twine(int Val) : LHSKind(DecIKind) { |
| 326 | LHS.decI = Val; |
| 327 | } |
| 328 | |
| 329 | /// Construct a twine to print \p Val as an unsigned decimal integer. |
| 330 | explicit Twine(const unsigned long &Val) : LHSKind(DecULKind) { |
| 331 | LHS.decUL = &Val; |
| 332 | } |
| 333 | |
| 334 | /// Construct a twine to print \p Val as a signed decimal integer. |
| 335 | explicit Twine(const long &Val) : LHSKind(DecLKind) { |
| 336 | LHS.decL = &Val; |
| 337 | } |
| 338 | |
| 339 | /// Construct a twine to print \p Val as an unsigned decimal integer. |
| 340 | explicit Twine(const unsigned long long &Val) : LHSKind(DecULLKind) { |
| 341 | LHS.decULL = &Val; |
| 342 | } |
| 343 | |
| 344 | /// Construct a twine to print \p Val as a signed decimal integer. |
| 345 | explicit Twine(const long long &Val) : LHSKind(DecLLKind) { |
| 346 | LHS.decLL = &Val; |
| 347 | } |
| 348 | |
| 349 | // FIXME: Unfortunately, to make sure this is as efficient as possible we |
| 350 | // need extra binary constructors from particular types. We can't rely on |
| 351 | // the compiler to be smart enough to fold operator+()/concat() down to the |
| 352 | // right thing. Yet. |
| 353 | |
| 354 | /// Construct as the concatenation of a C string and a StringRef. |
| 355 | /*implicit*/ Twine(const char *LHS, const StringRef &RHS) |
| 356 | : LHSKind(CStringKind), RHSKind(StringRefKind) { |
| 357 | this->LHS.cString = LHS; |
| 358 | this->RHS.stringRef = &RHS; |
| 359 | assert(isValid() && "Invalid twine!"); |
| 360 | } |
| 361 | |
| 362 | /// Construct as the concatenation of a StringRef and a C string. |
| 363 | /*implicit*/ Twine(const StringRef &LHS, const char *RHS) |
| 364 | : LHSKind(StringRefKind), RHSKind(CStringKind) { |
| 365 | this->LHS.stringRef = &LHS; |
| 366 | this->RHS.cString = RHS; |
| 367 | assert(isValid() && "Invalid twine!"); |
| 368 | } |
| 369 | |
| 370 | /// Since the intended use of twines is as temporary objects, assignments |
| 371 | /// when concatenating might cause undefined behavior or stack corruptions |
| 372 | Twine &operator=(const Twine &) = delete; |
| 373 | |
| 374 | /// Create a 'null' string, which is an empty string that always |
| 375 | /// concatenates to form another empty string. |
| 376 | static Twine createNull() { |
| 377 | return Twine(NullKind); |
| 378 | } |
| 379 | |
| 380 | /// @} |
| 381 | /// @name Numeric Conversions |
| 382 | /// @{ |
| 383 | |
| 384 | // Construct a twine to print \p Val as an unsigned hexadecimal integer. |
| 385 | static Twine utohexstr(const uint64_t &Val) { |
| 386 | Child LHS, RHS; |
| 387 | LHS.uHex = &Val; |
| 388 | RHS.twine = nullptr; |
| 389 | return Twine(LHS, UHexKind, RHS, EmptyKind); |
| 390 | } |
| 391 | |
| 392 | /// @} |
| 393 | /// @name Predicate Operations |
| 394 | /// @{ |
| 395 | |
| 396 | /// Check if this twine is trivially empty; a false return value does not |
| 397 | /// necessarily mean the twine is empty. |
| 398 | bool isTriviallyEmpty() const { |
| 399 | return isNullary(); |
| 400 | } |
| 401 | |
| 402 | /// Return true if this twine can be dynamically accessed as a single |
| 403 | /// StringRef value with getSingleStringRef(). |
| 404 | bool isSingleStringRef() const { |
| 405 | if (getRHSKind() != EmptyKind) return false; |
| 406 | |
| 407 | switch (getLHSKind()) { |
| 408 | case EmptyKind: |
| 409 | case CStringKind: |
| 410 | case StdStringKind: |
| 411 | case StringRefKind: |
| 412 | case SmallStringKind: |
| 413 | return true; |
| 414 | default: |
| 415 | return false; |
| 416 | } |
| 417 | } |
| 418 | |
| 419 | /// @} |
| 420 | /// @name String Operations |
| 421 | /// @{ |
| 422 | |
| 423 | Twine concat(const Twine &Suffix) const; |
| 424 | |
| 425 | /// @} |
| 426 | /// @name Output & Conversion. |
| 427 | /// @{ |
| 428 | |
| 429 | /// Return the twine contents as a std::string. |
| 430 | std::string str() const; |
| 431 | |
| 432 | /// Append the concatenated string into the given SmallString or SmallVector. |
| 433 | void toVector(SmallVectorImpl<char> &Out) const; |
| 434 | |
| 435 | /// This returns the twine as a single StringRef. This method is only valid |
| 436 | /// if isSingleStringRef() is true. |
| 437 | StringRef getSingleStringRef() const { |
| 438 | assert(isSingleStringRef() &&"This cannot be had as a single stringref!"); |
| 439 | switch (getLHSKind()) { |
| 440 | default: llvm_unreachable("Out of sync with isSingleStringRef"); |
| 441 | case EmptyKind: return StringRef(); |
| 442 | case CStringKind: return StringRef(LHS.cString); |
| 443 | case StdStringKind: return StringRef(*LHS.stdString); |
| 444 | case StringRefKind: return *LHS.stringRef; |
| 445 | case SmallStringKind: |
| 446 | return StringRef(LHS.smallString->data(), LHS.smallString->size()); |
| 447 | } |
| 448 | } |
| 449 | |
| 450 | /// This returns the twine as a single StringRef if it can be |
| 451 | /// represented as such. Otherwise the twine is written into the given |
| 452 | /// SmallVector and a StringRef to the SmallVector's data is returned. |
| 453 | StringRef toStringRef(SmallVectorImpl<char> &Out) const { |
| 454 | if (isSingleStringRef()) |
| 455 | return getSingleStringRef(); |
| 456 | toVector(Out); |
| 457 | return StringRef(Out.data(), Out.size()); |
| 458 | } |
| 459 | |
| 460 | /// This returns the twine as a single null terminated StringRef if it |
| 461 | /// can be represented as such. Otherwise the twine is written into the |
| 462 | /// given SmallVector and a StringRef to the SmallVector's data is returned. |
| 463 | /// |
| 464 | /// The returned StringRef's size does not include the null terminator. |
| 465 | StringRef toNullTerminatedStringRef(SmallVectorImpl<char> &Out) const; |
| 466 | |
| 467 | /// Write the concatenated string represented by this twine to the |
| 468 | /// stream \p OS. |
| 469 | void print(raw_ostream &OS) const; |
| 470 | |
| 471 | /// Dump the concatenated string represented by this twine to stderr. |
| 472 | void dump() const; |
| 473 | |
| 474 | /// Write the representation of this twine to the stream \p OS. |
| 475 | void printRepr(raw_ostream &OS) const; |
| 476 | |
| 477 | /// Dump the representation of this twine to stderr. |
| 478 | void dumpRepr() const; |
| 479 | |
| 480 | /// @} |
| 481 | }; |
| 482 | |
| 483 | /// @name Twine Inline Implementations |
| 484 | /// @{ |
| 485 | |
| 486 | inline Twine Twine::concat(const Twine &Suffix) const { |
| 487 | // Concatenation with null is null. |
| 488 | if (isNull() || Suffix.isNull()) |
| 489 | return Twine(NullKind); |
| 490 | |
| 491 | // Concatenation with empty yields the other side. |
| 492 | if (isEmpty()) |
| 493 | return Suffix; |
| 494 | if (Suffix.isEmpty()) |
| 495 | return *this; |
| 496 | |
| 497 | // Otherwise we need to create a new node, taking care to fold in unary |
| 498 | // twines. |
| 499 | Child NewLHS, NewRHS; |
| 500 | NewLHS.twine = this; |
| 501 | NewRHS.twine = &Suffix; |
| 502 | NodeKind NewLHSKind = TwineKind, NewRHSKind = TwineKind; |
| 503 | if (isUnary()) { |
| 504 | NewLHS = LHS; |
| 505 | NewLHSKind = getLHSKind(); |
| 506 | } |
| 507 | if (Suffix.isUnary()) { |
| 508 | NewRHS = Suffix.LHS; |
| 509 | NewRHSKind = Suffix.getLHSKind(); |
| 510 | } |
| 511 | |
| 512 | return Twine(NewLHS, NewLHSKind, NewRHS, NewRHSKind); |
| 513 | } |
| 514 | |
| 515 | inline Twine operator+(const Twine &LHS, const Twine &RHS) { |
| 516 | return LHS.concat(RHS); |
| 517 | } |
| 518 | |
| 519 | /// Additional overload to guarantee simplified codegen; this is equivalent to |
| 520 | /// concat(). |
| 521 | |
| 522 | inline Twine operator+(const char *LHS, const StringRef &RHS) { |
| 523 | return Twine(LHS, RHS); |
| 524 | } |
| 525 | |
| 526 | /// Additional overload to guarantee simplified codegen; this is equivalent to |
| 527 | /// concat(). |
| 528 | |
| 529 | inline Twine operator+(const StringRef &LHS, const char *RHS) { |
| 530 | return Twine(LHS, RHS); |
| 531 | } |
| 532 | |
| 533 | inline raw_ostream &operator<<(raw_ostream &OS, const Twine &RHS) { |
| 534 | RHS.print(OS); |
| 535 | return OS; |
| 536 | } |
| 537 | |
| 538 | /// @} |
| 539 | |
| 540 | } // end namespace llvm |
| 541 | |
| 542 | #endif // LLVM_ADT_TWINE_H |