blob: 8576c18ddcd818aca2c07a6fb78fef720688df91 [file] [log] [blame]
Andrew Walbran3d2c1972020-04-07 12:24:26 +01001//===-- ConstString.h -------------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef liblldb_ConstString_h_
10#define liblldb_ConstString_h_
11
12#include "llvm/ADT/StringRef.h"
13#include "llvm/Support/FormatVariadic.h"
14
15#include <stddef.h>
16
17namespace lldb_private {
18class Stream;
19}
20namespace llvm {
21class raw_ostream;
22}
23
24namespace lldb_private {
25
26/// \class ConstString ConstString.h "lldb/Utility/ConstString.h"
27/// A uniqued constant string class.
28///
29/// Provides an efficient way to store strings as uniqued strings. After the
30/// strings are uniqued, finding strings that are equal to one another is very
31/// fast as just the pointers need to be compared. It also allows for many
32/// common strings from many different sources to be shared to keep the memory
33/// footprint low.
34///
35/// No reference counting is done on strings that are added to the string
36/// pool, once strings are added they are in the string pool for the life of
37/// the program.
38class ConstString {
39public:
40 /// Default constructor
41 ///
42 /// Initializes the string to an empty string.
43 ConstString() : m_string(nullptr) {}
44
45 /// Copy constructor
46 ///
47 /// Copies the string value in \a rhs into this object.
48 ///
49 /// \param[in] rhs
50 /// Another string object to copy.
51 ConstString(const ConstString &rhs) : m_string(rhs.m_string) {}
52
53 explicit ConstString(const llvm::StringRef &s);
54
55 /// Construct with C String value
56 ///
57 /// Constructs this object with a C string by looking to see if the
58 /// C string already exists in the global string pool. If it doesn't
59 /// exist, it is added to the string pool.
60 ///
61 /// \param[in] cstr
62 /// A NULL terminated C string to add to the string pool.
63 explicit ConstString(const char *cstr);
64
65 /// Construct with C String value with max length
66 ///
67 /// Constructs this object with a C string with a length. If \a max_cstr_len
68 /// is greater than the actual length of the string, the string length will
69 /// be truncated. This allows substrings to be created without the need to
70 /// NULL terminate the string as it is passed into this function.
71 ///
72 /// \param[in] cstr
73 /// A pointer to the first character in the C string. The C
74 /// string can be NULL terminated in a buffer that contains
75 /// more characters than the length of the string, or the
76 /// string can be part of another string and a new substring
77 /// can be created.
78 ///
79 /// \param[in] max_cstr_len
80 /// The max length of \a cstr. If the string length of \a cstr
81 /// is less than \a max_cstr_len, then the string will be
82 /// truncated. If the string length of \a cstr is greater than
83 /// \a max_cstr_len, then only max_cstr_len bytes will be used
84 /// from \a cstr.
85 explicit ConstString(const char *cstr, size_t max_cstr_len);
86
87 /// Destructor
88 ///
89 /// Since constant string values are currently not reference counted, there
90 /// isn't much to do here.
91 ~ConstString() = default;
92
93 /// C string equality binary predicate function object for ConstString
94 /// objects.
95 struct StringIsEqual {
96 /// C equality test.
97 ///
98 /// Two C strings are equal when they are contained in ConstString objects
99 /// when their pointer values are equal to each other.
100 ///
101 /// \return
102 /// Returns \b true if the C string in \a lhs is equal to
103 /// the C string value in \a rhs, \b false otherwise.
104 bool operator()(const char *lhs, const char *rhs) const {
105 return lhs == rhs;
106 }
107 };
108
109 /// Convert to bool operator.
110 ///
111 /// This allows code to check a ConstString object to see if it contains a
112 /// valid string using code such as:
113 ///
114 /// \code
115 /// ConstString str(...);
116 /// if (str)
117 /// { ...
118 /// \endcode
119 ///
120 /// \return
121 /// /b True this object contains a valid non-empty C string, \b
122 /// false otherwise.
123 explicit operator bool() const { return !IsEmpty(); }
124
125 /// Assignment operator
126 ///
127 /// Assigns the string in this object with the value from \a rhs.
128 ///
129 /// \param[in] rhs
130 /// Another string object to copy into this object.
131 ///
132 /// \return
133 /// A const reference to this object.
134 ConstString operator=(ConstString rhs) {
135 m_string = rhs.m_string;
136 return *this;
137 }
138
139 /// Equal to operator
140 ///
141 /// Returns true if this string is equal to the string in \a rhs. This
142 /// operation is very fast as it results in a pointer comparison since all
143 /// strings are in a uniqued in a global string pool.
144 ///
145 /// \param[in] rhs
146 /// Another string object to compare this object to.
147 ///
148 /// \return
149 /// \li \b true if this object is equal to \a rhs.
150 /// \li \b false if this object is not equal to \a rhs.
151 bool operator==(ConstString rhs) const {
152 // We can do a pointer compare to compare these strings since they must
153 // come from the same pool in order to be equal.
154 return m_string == rhs.m_string;
155 }
156
157 /// Equal to operator against a non-ConstString value.
158 ///
159 /// Returns true if this string is equal to the string in \a rhs. This
160 /// overload is usually slower than comparing against a ConstString value.
161 /// However, if the rhs string not already a ConstString and it is impractical
162 /// to turn it into a non-temporary variable, then this overload is faster.
163 ///
164 /// \param[in] rhs
165 /// Another string object to compare this object to.
166 ///
167 /// \return
168 /// \li \b true if this object is equal to \a rhs.
169 /// \li \b false if this object is not equal to \a rhs.
170 bool operator==(const char *rhs) const {
171 // ConstString differentiates between empty strings and nullptr strings, but
172 // StringRef doesn't. Therefore we have to do this check manually now.
173 if (m_string == nullptr && rhs != nullptr)
174 return false;
175 if (m_string != nullptr && rhs == nullptr)
176 return false;
177
178 return GetStringRef() == rhs;
179 }
180
181 /// Not equal to operator
182 ///
183 /// Returns true if this string is not equal to the string in \a rhs. This
184 /// operation is very fast as it results in a pointer comparison since all
185 /// strings are in a uniqued in a global string pool.
186 ///
187 /// \param[in] rhs
188 /// Another string object to compare this object to.
189 ///
190 /// \return
191 /// \li \b true if this object is not equal to \a rhs.
192 /// \li \b false if this object is equal to \a rhs.
193 bool operator!=(ConstString rhs) const {
194 return m_string != rhs.m_string;
195 }
196
197 /// Not equal to operator against a non-ConstString value.
198 ///
199 /// Returns true if this string is not equal to the string in \a rhs. This
200 /// overload is usually slower than comparing against a ConstString value.
201 /// However, if the rhs string not already a ConstString and it is impractical
202 /// to turn it into a non-temporary variable, then this overload is faster.
203 ///
204 /// \param[in] rhs
205 /// Another string object to compare this object to.
206 ///
207 /// \return
208 /// \li \b true if this object is not equal to \a rhs.
209 /// \li \b false if this object is equal to \a rhs.
210 bool operator!=(const char *rhs) const { return !(*this == rhs); }
211
212 bool operator<(ConstString rhs) const;
213
214 /// Get the string value as a C string.
215 ///
216 /// Get the value of the contained string as a NULL terminated C string
217 /// value.
218 ///
219 /// If \a value_if_empty is nullptr, then nullptr will be returned.
220 ///
221 /// \return
222 /// Returns \a value_if_empty if the string is empty, otherwise
223 /// the C string value contained in this object.
224 const char *AsCString(const char *value_if_empty = nullptr) const {
225 return (IsEmpty() ? value_if_empty : m_string);
226 }
227
228 /// Get the string value as a llvm::StringRef
229 ///
230 /// \return
231 /// Returns a new llvm::StringRef object filled in with the
232 /// needed data.
233 llvm::StringRef GetStringRef() const {
234 return llvm::StringRef(m_string, GetLength());
235 }
236
237 /// Get the string value as a C string.
238 ///
239 /// Get the value of the contained string as a NULL terminated C string
240 /// value. Similar to the ConstString::AsCString() function, yet this
241 /// function will always return nullptr if the string is not valid. So this
242 /// function is a direct accessor to the string pointer value.
243 ///
244 /// \return
245 /// Returns nullptr the string is invalid, otherwise the C string
246 /// value contained in this object.
247 const char *GetCString() const { return m_string; }
248
249 /// Get the length in bytes of string value.
250 ///
251 /// The string pool stores the length of the string, so we can avoid calling
252 /// strlen() on the pointer value with this function.
253 ///
254 /// \return
255 /// Returns the number of bytes that this string occupies in
256 /// memory, not including the NULL termination byte.
257 size_t GetLength() const;
258
259 /// Clear this object's state.
260 ///
261 /// Clear any contained string and reset the value to the empty string
262 /// value.
263 void Clear() { m_string = nullptr; }
264
265 /// Equal to operator
266 ///
267 /// Returns true if this string is equal to the string in \a rhs. If case
268 /// sensitive equality is tested, this operation is very fast as it results
269 /// in a pointer comparison since all strings are in a uniqued in a global
270 /// string pool.
271 ///
272 /// \param[in] rhs
273 /// The Left Hand Side const ConstString object reference.
274 ///
275 /// \param[in] rhs
276 /// The Right Hand Side const ConstString object reference.
277 ///
278 /// \param[in] case_sensitive
279 /// Case sensitivity. If true, case sensitive equality
280 /// will be tested, otherwise character case will be ignored
281 ///
282 /// \return
283 /// \li \b true if this object is equal to \a rhs.
284 /// \li \b false if this object is not equal to \a rhs.
285 static bool Equals(ConstString lhs, ConstString rhs,
286 const bool case_sensitive = true);
287
288 /// Compare two string objects.
289 ///
290 /// Compares the C string values contained in \a lhs and \a rhs and returns
291 /// an integer result.
292 ///
293 /// NOTE: only call this function when you want a true string
294 /// comparison. If you want string equality use the, use the == operator as
295 /// it is much more efficient. Also if you want string inequality, use the
296 /// != operator for the same reasons.
297 ///
298 /// \param[in] lhs
299 /// The Left Hand Side const ConstString object reference.
300 ///
301 /// \param[in] rhs
302 /// The Right Hand Side const ConstString object reference.
303 ///
304 /// \param[in] case_sensitive
305 /// Case sensitivity of compare. If true, case sensitive compare
306 /// will be performed, otherwise character case will be ignored
307 ///
308 /// \return
309 /// \li -1 if lhs < rhs
310 /// \li 0 if lhs == rhs
311 /// \li 1 if lhs > rhs
312 static int Compare(ConstString lhs, ConstString rhs,
313 const bool case_sensitive = true);
314
315 /// Dump the object description to a stream.
316 ///
317 /// Dump the string value to the stream \a s. If the contained string is
318 /// empty, print \a value_if_empty to the stream instead. If \a
319 /// value_if_empty is nullptr, then nothing will be dumped to the stream.
320 ///
321 /// \param[in] s
322 /// The stream that will be used to dump the object description.
323 ///
324 /// \param[in] value_if_empty
325 /// The value to dump if the string is empty. If nullptr, nothing
326 /// will be output to the stream.
327 void Dump(Stream *s, const char *value_if_empty = nullptr) const;
328
329 /// Dump the object debug description to a stream.
330 ///
331 /// \param[in] s
332 /// The stream that will be used to dump the object description.
333 void DumpDebug(Stream *s) const;
334
335 /// Test for empty string.
336 ///
337 /// \return
338 /// \li \b true if the contained string is empty.
339 /// \li \b false if the contained string is not empty.
340 bool IsEmpty() const { return m_string == nullptr || m_string[0] == '\0'; }
341
342 /// Test for null string.
343 ///
344 /// \return
345 /// \li \b true if there is no string associated with this instance.
346 /// \li \b false if there is a string associated with this instance.
347 bool IsNull() const { return m_string == nullptr; }
348
349 /// Set the C string value.
350 ///
351 /// Set the string value in the object by uniquing the \a cstr string value
352 /// in our global string pool.
353 ///
354 /// If the C string already exists in the global string pool, it finds the
355 /// current entry and returns the existing value. If it doesn't exist, it is
356 /// added to the string pool.
357 ///
358 /// \param[in] cstr
359 /// A NULL terminated C string to add to the string pool.
360 void SetCString(const char *cstr);
361
362 void SetString(const llvm::StringRef &s);
363
364 /// Set the C string value and its mangled counterpart.
365 ///
366 /// Object files and debug symbols often use mangled string to represent the
367 /// linkage name for a symbol, function or global. The string pool can
368 /// efficiently store these values and their counterparts so when we run
369 /// into another instance of a mangled name, we can avoid calling the name
370 /// demangler over and over on the same strings and then trying to unique
371 /// them.
372 ///
373 /// \param[in] demangled
374 /// The demangled string to correlate with the \a mangled name.
375 ///
376 /// \param[in] mangled
377 /// The already uniqued mangled ConstString to correlate the
378 /// soon to be uniqued version of \a demangled.
379 void SetStringWithMangledCounterpart(llvm::StringRef demangled,
380 ConstString mangled);
381
382 /// Retrieve the mangled or demangled counterpart for a mangled or demangled
383 /// ConstString.
384 ///
385 /// Object files and debug symbols often use mangled string to represent the
386 /// linkage name for a symbol, function or global. The string pool can
387 /// efficiently store these values and their counterparts so when we run
388 /// into another instance of a mangled name, we can avoid calling the name
389 /// demangler over and over on the same strings and then trying to unique
390 /// them.
391 ///
392 /// \param[in] counterpart
393 /// A reference to a ConstString object that might get filled in
394 /// with the demangled/mangled counterpart.
395 ///
396 /// \return
397 /// /b True if \a counterpart was filled in with the counterpart
398 /// /b false otherwise.
399 bool GetMangledCounterpart(ConstString &counterpart) const;
400
401 /// Set the C string value with length.
402 ///
403 /// Set the string value in the object by uniquing \a cstr_len bytes
404 /// starting at the \a cstr string value in our global string pool. If trim
405 /// is true, then \a cstr_len indicates a maximum length of the CString and
406 /// if the actual length of the string is less, then it will be trimmed.
407 ///
408 /// If the C string already exists in the global string pool, it finds the
409 /// current entry and returns the existing value. If it doesn't exist, it is
410 /// added to the string pool.
411 ///
412 /// \param[in] cstr
413 /// A NULL terminated C string to add to the string pool.
414 ///
415 /// \param[in] cstr_len
416 /// The maximum length of the C string.
417 void SetCStringWithLength(const char *cstr, size_t cstr_len);
418
419 /// Set the C string value with the minimum length between \a fixed_cstr_len
420 /// and the actual length of the C string. This can be used for data
421 /// structures that have a fixed length to store a C string where the string
422 /// might not be NULL terminated if the string takes the entire buffer.
423 void SetTrimmedCStringWithLength(const char *cstr, size_t fixed_cstr_len);
424
425 /// Get the memory cost of this object.
426 ///
427 /// Return the size in bytes that this object takes in memory. This returns
428 /// the size in bytes of this object, which does not include any the shared
429 /// string values it may refer to.
430 ///
431 /// \return
432 /// The number of bytes that this object occupies in memory.
433 ///
434 /// \see ConstString::StaticMemorySize ()
435 size_t MemorySize() const { return sizeof(ConstString); }
436
437 /// Get the size in bytes of the current global string pool.
438 ///
439 /// Reports the size in bytes of all shared C string values, containers and
440 /// any other values as a byte size for the entire string pool.
441 ///
442 /// \return
443 /// The number of bytes that the global string pool occupies
444 /// in memory.
445 static size_t StaticMemorySize();
446
447protected:
448 // Member variables
449 const char *m_string;
450};
451
452/// Stream the string value \a str to the stream \a s
453Stream &operator<<(Stream &s, ConstString str);
454
455} // namespace lldb_private
456
457namespace llvm {
458template <> struct format_provider<lldb_private::ConstString> {
459 static void format(const lldb_private::ConstString &CS, llvm::raw_ostream &OS,
460 llvm::StringRef Options);
461};
462}
463
464#endif // liblldb_ConstString_h_