Lift some code out of parse_identifiers
Make parse_identifiers less complex. Pylint was complaining that it had too
many local variables, and it had a point.
* Lift the constants identifier_regex and exclusion_lines to class
constants (renamed to uppercase because they're constants).
* Lift the per-file loop into a new function parse_identifiers_in_file.
No intended behavior change.
Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com>
diff --git a/tests/scripts/check_names.py b/tests/scripts/check_names.py
index 94f4cb1..1fb6487 100755
--- a/tests/scripts/check_names.py
+++ b/tests/scripts/check_names.py
@@ -457,6 +457,105 @@
return enum_consts
+ IDENTIFIER_REGEX = re.compile(
+ # Match " something(a" or " *something(a". Functions.
+ # Assumptions:
+ # - function definition from return type to one of its arguments is
+ # all on one line
+ # - function definition line only contains alphanumeric, asterisk,
+ # underscore, and open bracket
+ r".* \**(\w+) *\( *\w|"
+ # Match "(*something)(".
+ r".*\( *\* *(\w+) *\) *\(|"
+ # Match names of named data structures.
+ r"(?:typedef +)?(?:struct|union|enum) +(\w+)(?: *{)?$|"
+ # Match names of typedef instances, after closing bracket.
+ r"}? *(\w+)[;[].*"
+ )
+ # The regex below is indented for clarity.
+ EXCLUSION_LINES = re.compile(
+ r"^("
+ r"extern +\"C\"|" # pylint: disable=bad-continuation
+ r"(typedef +)?(struct|union|enum)( *{)?$|"
+ r"} *;?$|"
+ r"$|"
+ r"//|"
+ r"#"
+ r")"
+ )
+
+ def parse_identifiers_in_file(self, header_file, identifiers):
+ """
+ Parse all lines of a header where a function/enum/struct/union/typedef
+ identifier is declared, based on some regex and heuristics. Highly
+ dependent on formatting style.
+
+ Append found matches to the list ``identifiers``.
+ """
+
+ with open(header_file, "r", encoding="utf-8") as header:
+ in_block_comment = False
+ # The previous line variable is used for concatenating lines
+ # when identifiers are formatted and spread across multiple
+ # lines.
+ previous_line = ""
+
+ for line_no, line in enumerate(header):
+ # Terminate current comment?
+ if in_block_comment:
+ line = re.sub(r".*?\*/", r"", line, 1)
+ in_block_comment = False
+ # Remove full comments and string literals
+ line = re.sub(r'/\*.*?\*/|(")(?:[^\\\"]|\\.)*"',
+ lambda s: '""' if s.group(1) else ' ',
+ line)
+ # Start an unfinished comment?
+ m = re.match(r"/\*", line)
+ if m:
+ in_block_comment = True
+ line = line[:m.end(0)]
+
+ if self.EXCLUSION_LINES.search(line):
+ previous_line = ""
+ continue
+
+ # If the line contains only space-separated alphanumeric
+ # characters (or underscore, asterisk, or, open bracket),
+ # and nothing else, high chance it's a declaration that
+ # continues on the next line
+ if re.search(r"^([\w\*\(]+\s+)+$", line):
+ previous_line += line
+ continue
+
+ # If previous line seemed to start an unfinished declaration
+ # (as above), concat and treat them as one.
+ if previous_line:
+ line = previous_line.strip() + " " + line.strip() + "\n"
+ previous_line = ""
+
+ # Skip parsing if line has a space in front = heuristic to
+ # skip function argument lines (highly subject to formatting
+ # changes)
+ if line[0] == " ":
+ continue
+
+ identifier = self.IDENTIFIER_REGEX.search(line)
+
+ if not identifier:
+ continue
+
+ # Find the group that matched, and append it
+ for group in identifier.groups():
+ if not group:
+ continue
+
+ identifiers.append(Match(
+ header_file,
+ line,
+ line_no,
+ identifier.span(),
+ group))
+
def parse_identifiers(self, include, exclude=None):
"""
Parse all lines of a header where a function/enum/struct/union/typedef
@@ -469,100 +568,13 @@
Returns a List of Match objects with identifiers.
"""
- identifier_regex = re.compile(
- # Match " something(a" or " *something(a". Functions.
- # Assumptions:
- # - function definition from return type to one of its arguments is
- # all on one line
- # - function definition line only contains alphanumeric, asterisk,
- # underscore, and open bracket
- r".* \**(\w+) *\( *\w|"
- # Match "(*something)(".
- r".*\( *\* *(\w+) *\) *\(|"
- # Match names of named data structures.
- r"(?:typedef +)?(?:struct|union|enum) +(\w+)(?: *{)?$|"
- # Match names of typedef instances, after closing bracket.
- r"}? *(\w+)[;[].*"
- )
- # The regex below is indented for clarity.
- exclusion_lines = re.compile(
- r"^("
- r"extern +\"C\"|" # pylint: disable=bad-continuation
- r"(typedef +)?(struct|union|enum)( *{)?$|"
- r"} *;?$|"
- r"$|"
- r"//|"
- r"#"
- r")"
- )
files = self.get_files(include, exclude)
self.log.debug("Looking for identifiers in {} files".format(len(files)))
identifiers = []
for header_file in files:
- with open(header_file, "r", encoding="utf-8") as header:
- in_block_comment = False
- # The previous line variable is used for concatenating lines
- # when identifiers are formatted and spread across multiple
- # lines.
- previous_line = ""
-
- for line_no, line in enumerate(header):
- # Terminate current comment?
- if in_block_comment:
- line = re.sub(r".*?\*/", r"", line, 1)
- in_block_comment = False
- # Remove full comments and string literals
- line = re.sub(r'/\*.*?\*/|(")(?:[^\\\"]|\\.)*"',
- lambda s: '""' if s.group(1) else ' ',
- line)
- # Start an unfinished comment?
- m = re.match(r"/\*", line)
- if m:
- in_block_comment = True
- line = line[:m.end(0)]
-
- if exclusion_lines.search(line):
- previous_line = ""
- continue
-
- # If the line contains only space-separated alphanumeric
- # characters (or underscore, asterisk, or, open bracket),
- # and nothing else, high chance it's a declaration that
- # continues on the next line
- if re.search(r"^([\w\*\(]+\s+)+$", line):
- previous_line += line
- continue
-
- # If previous line seemed to start an unfinished declaration
- # (as above), concat and treat them as one.
- if previous_line:
- line = previous_line.strip() + " " + line.strip() + "\n"
- previous_line = ""
-
- # Skip parsing if line has a space in front = heuristic to
- # skip function argument lines (highly subject to formatting
- # changes)
- if line[0] == " ":
- continue
-
- identifier = identifier_regex.search(line)
-
- if not identifier:
- continue
-
- # Find the group that matched, and append it
- for group in identifier.groups():
- if not group:
- continue
-
- identifiers.append(Match(
- header_file,
- line,
- line_no,
- identifier.span(),
- group))
+ self.parse_identifiers_in_file(header_file, identifiers)
return identifiers