Olivier Deprez | f4ef2d0 | 2021-04-20 13:36:24 +0200 | [diff] [blame] | 1 | //===--- LoopConvertUtils.h - clang-tidy ------------------------*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_LOOP_CONVERT_UTILS_H |
| 10 | #define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_LOOP_CONVERT_UTILS_H |
| 11 | |
| 12 | #include "clang/AST/ASTContext.h" |
| 13 | #include "clang/AST/RecursiveASTVisitor.h" |
| 14 | #include "clang/ASTMatchers/ASTMatchFinder.h" |
| 15 | #include "clang/Basic/SourceLocation.h" |
| 16 | #include "llvm/ADT/DenseMap.h" |
| 17 | #include "llvm/ADT/SmallSet.h" |
| 18 | #include "llvm/ADT/SmallVector.h" |
| 19 | #include "llvm/ADT/StringRef.h" |
| 20 | #include <algorithm> |
| 21 | #include <memory> |
| 22 | #include <string> |
| 23 | #include <utility> |
| 24 | |
| 25 | namespace clang { |
| 26 | namespace tidy { |
| 27 | namespace modernize { |
| 28 | |
| 29 | enum LoopFixerKind { |
| 30 | LFK_Array, |
| 31 | LFK_Iterator, |
| 32 | LFK_ReverseIterator, |
| 33 | LFK_PseudoArray |
| 34 | }; |
| 35 | |
| 36 | /// A map used to walk the AST in reverse: maps child Stmt to parent Stmt. |
| 37 | typedef llvm::DenseMap<const clang::Stmt *, const clang::Stmt *> StmtParentMap; |
| 38 | |
| 39 | /// A map used to walk the AST in reverse: |
| 40 | /// maps VarDecl to the to parent DeclStmt. |
| 41 | typedef llvm::DenseMap<const clang::VarDecl *, const clang::DeclStmt *> |
| 42 | DeclParentMap; |
| 43 | |
| 44 | /// A map used to track which variables have been removed by a refactoring pass. |
| 45 | /// It maps the parent ForStmt to the removed index variable's VarDecl. |
| 46 | typedef llvm::DenseMap<const clang::ForStmt *, const clang::VarDecl *> |
| 47 | ReplacedVarsMap; |
| 48 | |
| 49 | /// A map used to remember the variable names generated in a Stmt |
| 50 | typedef llvm::DenseMap<const clang::Stmt *, std::string> |
| 51 | StmtGeneratedVarNameMap; |
| 52 | |
| 53 | /// A vector used to store the AST subtrees of an Expr. |
| 54 | typedef llvm::SmallVector<const clang::Expr *, 16> ComponentVector; |
| 55 | |
| 56 | /// Class used build the reverse AST properties needed to detect |
| 57 | /// name conflicts and free variables. |
| 58 | class StmtAncestorASTVisitor |
| 59 | : public clang::RecursiveASTVisitor<StmtAncestorASTVisitor> { |
| 60 | public: |
| 61 | StmtAncestorASTVisitor() { StmtStack.push_back(nullptr); } |
| 62 | |
| 63 | /// Run the analysis on the AST. |
| 64 | /// |
| 65 | /// In case we're running this analysis multiple times, don't repeat the work. |
| 66 | void gatherAncestors(ASTContext &Ctx) { |
| 67 | if (StmtAncestors.empty()) |
| 68 | TraverseAST(Ctx); |
| 69 | } |
| 70 | |
| 71 | /// Accessor for StmtAncestors. |
| 72 | const StmtParentMap &getStmtToParentStmtMap() { return StmtAncestors; } |
| 73 | |
| 74 | /// Accessor for DeclParents. |
| 75 | const DeclParentMap &getDeclToParentStmtMap() { return DeclParents; } |
| 76 | |
| 77 | friend class clang::RecursiveASTVisitor<StmtAncestorASTVisitor>; |
| 78 | |
| 79 | private: |
| 80 | StmtParentMap StmtAncestors; |
| 81 | DeclParentMap DeclParents; |
| 82 | llvm::SmallVector<const clang::Stmt *, 16> StmtStack; |
| 83 | |
| 84 | bool TraverseStmt(clang::Stmt *Statement); |
| 85 | bool VisitDeclStmt(clang::DeclStmt *Statement); |
| 86 | }; |
| 87 | |
| 88 | /// Class used to find the variables and member expressions on which an |
| 89 | /// arbitrary expression depends. |
| 90 | class ComponentFinderASTVisitor |
| 91 | : public clang::RecursiveASTVisitor<ComponentFinderASTVisitor> { |
| 92 | public: |
| 93 | ComponentFinderASTVisitor() = default; |
| 94 | |
| 95 | /// Find the components of an expression and place them in a ComponentVector. |
| 96 | void findExprComponents(const clang::Expr *SourceExpr) { |
| 97 | TraverseStmt(const_cast<clang::Expr *>(SourceExpr)); |
| 98 | } |
| 99 | |
| 100 | /// Accessor for Components. |
| 101 | const ComponentVector &getComponents() { return Components; } |
| 102 | |
| 103 | friend class clang::RecursiveASTVisitor<ComponentFinderASTVisitor>; |
| 104 | |
| 105 | private: |
| 106 | ComponentVector Components; |
| 107 | |
| 108 | bool VisitDeclRefExpr(clang::DeclRefExpr *E); |
| 109 | bool VisitMemberExpr(clang::MemberExpr *Member); |
| 110 | }; |
| 111 | |
| 112 | /// Class used to determine if an expression is dependent on a variable declared |
| 113 | /// inside of the loop where it would be used. |
| 114 | class DependencyFinderASTVisitor |
| 115 | : public clang::RecursiveASTVisitor<DependencyFinderASTVisitor> { |
| 116 | public: |
| 117 | DependencyFinderASTVisitor(const StmtParentMap *StmtParents, |
| 118 | const DeclParentMap *DeclParents, |
| 119 | const ReplacedVarsMap *ReplacedVars, |
| 120 | const clang::Stmt *ContainingStmt) |
| 121 | : StmtParents(StmtParents), DeclParents(DeclParents), |
| 122 | ContainingStmt(ContainingStmt), ReplacedVars(ReplacedVars) {} |
| 123 | |
| 124 | /// Run the analysis on Body, and return true iff the expression |
| 125 | /// depends on some variable declared within ContainingStmt. |
| 126 | /// |
| 127 | /// This is intended to protect against hoisting the container expression |
| 128 | /// outside of an inner context if part of that expression is declared in that |
| 129 | /// inner context. |
| 130 | /// |
| 131 | /// For example, |
| 132 | /// \code |
| 133 | /// const int N = 10, M = 20; |
| 134 | /// int arr[N][M]; |
| 135 | /// int getRow(); |
| 136 | /// |
| 137 | /// for (int i = 0; i < M; ++i) { |
| 138 | /// int k = getRow(); |
| 139 | /// printf("%d:", arr[k][i]); |
| 140 | /// } |
| 141 | /// \endcode |
| 142 | /// At first glance, this loop looks like it could be changed to |
| 143 | /// \code |
| 144 | /// for (int elem : arr[k]) { |
| 145 | /// int k = getIndex(); |
| 146 | /// printf("%d:", elem); |
| 147 | /// } |
| 148 | /// \endcode |
| 149 | /// But this is malformed, since `k` is used before it is defined! |
| 150 | /// |
| 151 | /// In order to avoid this, this class looks at the container expression |
| 152 | /// `arr[k]` and decides whether or not it contains a sub-expression declared |
| 153 | /// within the loop body. |
| 154 | bool dependsOnInsideVariable(const clang::Stmt *Body) { |
| 155 | DependsOnInsideVariable = false; |
| 156 | TraverseStmt(const_cast<clang::Stmt *>(Body)); |
| 157 | return DependsOnInsideVariable; |
| 158 | } |
| 159 | |
| 160 | friend class clang::RecursiveASTVisitor<DependencyFinderASTVisitor>; |
| 161 | |
| 162 | private: |
| 163 | const StmtParentMap *StmtParents; |
| 164 | const DeclParentMap *DeclParents; |
| 165 | const clang::Stmt *ContainingStmt; |
| 166 | const ReplacedVarsMap *ReplacedVars; |
| 167 | bool DependsOnInsideVariable; |
| 168 | |
| 169 | bool VisitVarDecl(clang::VarDecl *V); |
| 170 | bool VisitDeclRefExpr(clang::DeclRefExpr *D); |
| 171 | }; |
| 172 | |
| 173 | /// Class used to determine if any declarations used in a Stmt would conflict |
| 174 | /// with a particular identifier. This search includes the names that don't |
| 175 | /// actually appear in the AST (i.e. created by a refactoring tool) by including |
| 176 | /// a map from Stmts to generated names associated with those stmts. |
| 177 | class DeclFinderASTVisitor |
| 178 | : public clang::RecursiveASTVisitor<DeclFinderASTVisitor> { |
| 179 | public: |
| 180 | DeclFinderASTVisitor(const std::string &Name, |
| 181 | const StmtGeneratedVarNameMap *GeneratedDecls) |
| 182 | : Name(Name), GeneratedDecls(GeneratedDecls), Found(false) {} |
| 183 | |
| 184 | /// Attempts to find any usages of variables name Name in Body, returning |
| 185 | /// true when it is used in Body. This includes the generated loop variables |
| 186 | /// of ForStmts which have already been transformed. |
| 187 | bool findUsages(const clang::Stmt *Body) { |
| 188 | Found = false; |
| 189 | TraverseStmt(const_cast<clang::Stmt *>(Body)); |
| 190 | return Found; |
| 191 | } |
| 192 | |
| 193 | friend class clang::RecursiveASTVisitor<DeclFinderASTVisitor>; |
| 194 | |
| 195 | private: |
| 196 | std::string Name; |
| 197 | /// GeneratedDecls keeps track of ForStmts which have been transformed, |
| 198 | /// mapping each modified ForStmt to the variable generated in the loop. |
| 199 | const StmtGeneratedVarNameMap *GeneratedDecls; |
| 200 | bool Found; |
| 201 | |
| 202 | bool VisitForStmt(clang::ForStmt *F); |
| 203 | bool VisitNamedDecl(clang::NamedDecl *D); |
| 204 | bool VisitDeclRefExpr(clang::DeclRefExpr *D); |
| 205 | bool VisitTypeLoc(clang::TypeLoc TL); |
| 206 | }; |
| 207 | |
| 208 | /// The information needed to describe a valid convertible usage |
| 209 | /// of an array index or iterator. |
| 210 | struct Usage { |
| 211 | enum UsageKind { |
| 212 | // Regular usages of the loop index (the ones not specified below). Some |
| 213 | // examples: |
| 214 | // \code |
| 215 | // int X = 8 * Arr[i]; |
| 216 | // ^~~~~~ |
| 217 | // f(param1, param2, *It); |
| 218 | // ^~~ |
| 219 | // if (Vec[i].SomeBool) {} |
| 220 | // ^~~~~~ |
| 221 | // \endcode |
| 222 | UK_Default, |
| 223 | // Indicates whether this is an access to a member through the arrow |
| 224 | // operator on pointers or iterators. |
| 225 | UK_MemberThroughArrow, |
| 226 | // If the variable is being captured by a lambda, indicates whether the |
| 227 | // capture was done by value or by reference. |
| 228 | UK_CaptureByCopy, |
| 229 | UK_CaptureByRef |
| 230 | }; |
| 231 | // The expression that is going to be converted. Null in case of lambda |
| 232 | // captures. |
| 233 | const Expr *Expression; |
| 234 | |
| 235 | UsageKind Kind; |
| 236 | |
| 237 | // Range that covers this usage. |
| 238 | SourceRange Range; |
| 239 | |
| 240 | explicit Usage(const Expr *E) |
| 241 | : Expression(E), Kind(UK_Default), Range(Expression->getSourceRange()) {} |
| 242 | Usage(const Expr *E, UsageKind Kind, SourceRange Range) |
| 243 | : Expression(E), Kind(Kind), Range(std::move(Range)) {} |
| 244 | }; |
| 245 | |
| 246 | /// A class to encapsulate lowering of the tool's confidence level. |
| 247 | class Confidence { |
| 248 | public: |
| 249 | enum Level { |
| 250 | // Transformations that are likely to change semantics. |
| 251 | CL_Risky, |
| 252 | |
| 253 | // Transformations that might change semantics. |
| 254 | CL_Reasonable, |
| 255 | |
| 256 | // Transformations that will not change semantics. |
| 257 | CL_Safe |
| 258 | }; |
| 259 | /// Initialize confidence level. |
| 260 | explicit Confidence(Confidence::Level Level) : CurrentLevel(Level) {} |
| 261 | |
| 262 | /// Lower the internal confidence level to Level, but do not raise it. |
| 263 | void lowerTo(Confidence::Level Level) { |
| 264 | CurrentLevel = std::min(Level, CurrentLevel); |
| 265 | } |
| 266 | |
| 267 | /// Return the internal confidence level. |
| 268 | Level getLevel() const { return CurrentLevel; } |
| 269 | |
| 270 | private: |
| 271 | Level CurrentLevel; |
| 272 | }; |
| 273 | |
| 274 | // The main computational result of ForLoopIndexVisitor. |
| 275 | typedef llvm::SmallVector<Usage, 8> UsageResult; |
| 276 | |
| 277 | // General functions used by ForLoopIndexUseVisitor and LoopConvertCheck. |
| 278 | const Expr *digThroughConstructors(const Expr *E); |
| 279 | bool areSameExpr(ASTContext *Context, const Expr *First, const Expr *Second); |
| 280 | const DeclRefExpr *getDeclRef(const Expr *E); |
| 281 | bool areSameVariable(const ValueDecl *First, const ValueDecl *Second); |
| 282 | |
| 283 | /// Discover usages of expressions consisting of index or iterator |
| 284 | /// access. |
| 285 | /// |
| 286 | /// Given an index variable, recursively crawls a for loop to discover if the |
| 287 | /// index variable is used in a way consistent with range-based for loop access. |
| 288 | class ForLoopIndexUseVisitor |
| 289 | : public RecursiveASTVisitor<ForLoopIndexUseVisitor> { |
| 290 | public: |
| 291 | ForLoopIndexUseVisitor(ASTContext *Context, const VarDecl *IndexVar, |
| 292 | const VarDecl *EndVar, const Expr *ContainerExpr, |
| 293 | const Expr *ArrayBoundExpr, |
| 294 | bool ContainerNeedsDereference); |
| 295 | |
| 296 | /// Finds all uses of IndexVar in Body, placing all usages in Usages, |
| 297 | /// and returns true if IndexVar was only used in a way consistent with a |
| 298 | /// range-based for loop. |
| 299 | /// |
| 300 | /// The general strategy is to reject any DeclRefExprs referencing IndexVar, |
| 301 | /// with the exception of certain acceptable patterns. |
| 302 | /// For arrays, the DeclRefExpr for IndexVar must appear as the index of an |
| 303 | /// ArraySubscriptExpression. Iterator-based loops may dereference |
| 304 | /// IndexVar or call methods through operator-> (builtin or overloaded). |
| 305 | /// Array-like containers may use IndexVar as a parameter to the at() member |
| 306 | /// function and in overloaded operator[]. |
| 307 | bool findAndVerifyUsages(const Stmt *Body); |
| 308 | |
| 309 | /// Add a set of components that we should consider relevant to the |
| 310 | /// container. |
| 311 | void addComponents(const ComponentVector &Components); |
| 312 | |
| 313 | /// Accessor for Usages. |
| 314 | const UsageResult &getUsages() const { return Usages; } |
| 315 | |
| 316 | /// Adds the Usage if it was not added before. |
| 317 | void addUsage(const Usage &U); |
| 318 | |
| 319 | /// Get the container indexed by IndexVar, if any. |
| 320 | const Expr *getContainerIndexed() const { return ContainerExpr; } |
| 321 | |
| 322 | /// Returns the statement declaring the variable created as an alias |
| 323 | /// for the loop element, if any. |
| 324 | const DeclStmt *getAliasDecl() const { return AliasDecl; } |
| 325 | |
| 326 | /// Accessor for ConfidenceLevel. |
| 327 | Confidence::Level getConfidenceLevel() const { |
| 328 | return ConfidenceLevel.getLevel(); |
| 329 | } |
| 330 | |
| 331 | /// Indicates if the alias declaration was in a place where it cannot |
| 332 | /// simply be removed but rather replaced with a use of the alias variable. |
| 333 | /// For example, variables declared in the condition of an if, switch, or for |
| 334 | /// stmt. |
| 335 | bool aliasUseRequired() const { return ReplaceWithAliasUse; } |
| 336 | |
| 337 | /// Indicates if the alias declaration came from the init clause of a |
| 338 | /// nested for loop. SourceRanges provided by Clang for DeclStmts in this |
| 339 | /// case need to be adjusted. |
| 340 | bool aliasFromForInit() const { return AliasFromForInit; } |
| 341 | |
| 342 | private: |
| 343 | /// Typedef used in CRTP functions. |
| 344 | typedef RecursiveASTVisitor<ForLoopIndexUseVisitor> VisitorBase; |
| 345 | friend class RecursiveASTVisitor<ForLoopIndexUseVisitor>; |
| 346 | |
| 347 | /// Overriden methods for RecursiveASTVisitor's traversal. |
| 348 | bool TraverseArraySubscriptExpr(ArraySubscriptExpr *E); |
| 349 | bool TraverseCXXMemberCallExpr(CXXMemberCallExpr *MemberCall); |
| 350 | bool TraverseCXXOperatorCallExpr(CXXOperatorCallExpr *OpCall); |
| 351 | bool TraverseLambdaCapture(LambdaExpr *LE, const LambdaCapture *C, |
| 352 | Expr *Init); |
| 353 | bool TraverseMemberExpr(MemberExpr *Member); |
| 354 | bool TraverseUnaryOperator(UnaryOperator *Uop); |
| 355 | bool VisitDeclRefExpr(DeclRefExpr *E); |
| 356 | bool VisitDeclStmt(DeclStmt *S); |
| 357 | bool TraverseStmt(Stmt *S); |
| 358 | |
| 359 | /// Add an expression to the list of expressions on which the container |
| 360 | /// expression depends. |
| 361 | void addComponent(const Expr *E); |
| 362 | |
| 363 | // Input member variables: |
| 364 | ASTContext *Context; |
| 365 | /// The index variable's VarDecl. |
| 366 | const VarDecl *IndexVar; |
| 367 | /// The loop's 'end' variable, which cannot be mentioned at all. |
| 368 | const VarDecl *EndVar; |
| 369 | /// The Expr which refers to the container. |
| 370 | const Expr *ContainerExpr; |
| 371 | /// The Expr which refers to the terminating condition for array-based loops. |
| 372 | const Expr *ArrayBoundExpr; |
| 373 | bool ContainerNeedsDereference; |
| 374 | |
| 375 | // Output member variables: |
| 376 | /// A container which holds all usages of IndexVar as the index of |
| 377 | /// ArraySubscriptExpressions. |
| 378 | UsageResult Usages; |
| 379 | llvm::SmallSet<SourceLocation, 8> UsageLocations; |
| 380 | bool OnlyUsedAsIndex; |
| 381 | /// The DeclStmt for an alias to the container element. |
| 382 | const DeclStmt *AliasDecl; |
| 383 | Confidence ConfidenceLevel; |
| 384 | /// A list of expressions on which ContainerExpr depends. |
| 385 | /// |
| 386 | /// If any of these expressions are encountered outside of an acceptable usage |
| 387 | /// of the loop element, lower our confidence level. |
| 388 | llvm::SmallVector<std::pair<const Expr *, llvm::FoldingSetNodeID>, 16> |
| 389 | DependentExprs; |
| 390 | |
| 391 | /// The parent-in-waiting. Will become the real parent once we traverse down |
| 392 | /// one level in the AST. |
| 393 | const Stmt *NextStmtParent; |
| 394 | /// The actual parent of a node when Visit*() calls are made. Only the |
| 395 | /// parentage of DeclStmt's to possible iteration/selection statements is of |
| 396 | /// importance. |
| 397 | const Stmt *CurrStmtParent; |
| 398 | |
| 399 | /// \see aliasUseRequired(). |
| 400 | bool ReplaceWithAliasUse; |
| 401 | /// \see aliasFromForInit(). |
| 402 | bool AliasFromForInit; |
| 403 | }; |
| 404 | |
| 405 | struct TUTrackingInfo { |
| 406 | /// Reset and initialize per-TU tracking information. |
| 407 | /// |
| 408 | /// Must be called before using container accessors. |
| 409 | TUTrackingInfo() : ParentFinder(new StmtAncestorASTVisitor) {} |
| 410 | |
| 411 | StmtAncestorASTVisitor &getParentFinder() { return *ParentFinder; } |
| 412 | StmtGeneratedVarNameMap &getGeneratedDecls() { return GeneratedDecls; } |
| 413 | ReplacedVarsMap &getReplacedVars() { return ReplacedVars; } |
| 414 | |
| 415 | private: |
| 416 | std::unique_ptr<StmtAncestorASTVisitor> ParentFinder; |
| 417 | StmtGeneratedVarNameMap GeneratedDecls; |
| 418 | ReplacedVarsMap ReplacedVars; |
| 419 | }; |
| 420 | |
| 421 | /// Create names for generated variables within a particular statement. |
| 422 | /// |
| 423 | /// VariableNamer uses a DeclContext as a reference point, checking for any |
| 424 | /// conflicting declarations higher up in the context or within SourceStmt. |
| 425 | /// It creates a variable name using hints from a source container and the old |
| 426 | /// index, if they exist. |
| 427 | class VariableNamer { |
| 428 | public: |
| 429 | // Supported naming styles. |
| 430 | enum NamingStyle { |
| 431 | NS_CamelBack, |
| 432 | NS_CamelCase, |
| 433 | NS_LowerCase, |
| 434 | NS_UpperCase, |
| 435 | }; |
| 436 | |
| 437 | VariableNamer(StmtGeneratedVarNameMap *GeneratedDecls, |
| 438 | const StmtParentMap *ReverseAST, const clang::Stmt *SourceStmt, |
| 439 | const clang::VarDecl *OldIndex, |
| 440 | const clang::ValueDecl *TheContainer, |
| 441 | const clang::ASTContext *Context, NamingStyle Style) |
| 442 | : GeneratedDecls(GeneratedDecls), ReverseAST(ReverseAST), |
| 443 | SourceStmt(SourceStmt), OldIndex(OldIndex), TheContainer(TheContainer), |
| 444 | Context(Context), Style(Style) {} |
| 445 | |
| 446 | /// Generate a new index name. |
| 447 | /// |
| 448 | /// Generates the name to be used for an inserted iterator. It relies on |
| 449 | /// declarationExists() to determine that there are no naming conflicts, and |
| 450 | /// tries to use some hints from the container name and the old index name. |
| 451 | std::string createIndexName(); |
| 452 | |
| 453 | private: |
| 454 | StmtGeneratedVarNameMap *GeneratedDecls; |
| 455 | const StmtParentMap *ReverseAST; |
| 456 | const clang::Stmt *SourceStmt; |
| 457 | const clang::VarDecl *OldIndex; |
| 458 | const clang::ValueDecl *TheContainer; |
| 459 | const clang::ASTContext *Context; |
| 460 | const NamingStyle Style; |
| 461 | |
| 462 | // Determine whether or not a declaration that would conflict with Symbol |
| 463 | // exists in an outer context or in any statement contained in SourceStmt. |
| 464 | bool declarationExists(llvm::StringRef Symbol); |
| 465 | }; |
| 466 | |
| 467 | } // namespace modernize |
| 468 | } // namespace tidy |
| 469 | } // namespace clang |
| 470 | |
| 471 | #endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_LOOP_CONVERT_UTILS_H |