blob: ab92c5d6f67bbcbd729546f07605f16ec0159636 [file] [log] [blame]
Gilles Peskine8266b5b2021-09-27 19:53:31 +02001#!/usr/bin/env python3
2#
3# Copyright The Mbed TLS Contributors
Dave Rodgman7ff79652023-11-03 12:04:52 +00004# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
Gilles Peskine8266b5b2021-09-27 19:53:31 +02005
6"""
7This script confirms that the naming of all symbols and identifiers in Mbed TLS
8are consistent with the house style and are also self-consistent. It only runs
9on Linux and macOS since it depends on nm.
10
11It contains two major Python classes, CodeParser and NameChecker. They both have
12a comprehensive "run-all" function (comprehensive_parse() and perform_checks())
13but the individual functions can also be used for specific needs.
14
15CodeParser makes heavy use of regular expressions to parse the code, and is
16dependent on the current code formatting. Many Python C parser libraries require
17preprocessed C code, which means no macro parsing. Compiler tools are also not
18very helpful when we want the exact location in the original source (which
19becomes impossible when e.g. comments are stripped).
20
21NameChecker performs the following checks:
22
23- All exported and available symbols in the library object files, are explicitly
24 declared in the header files. This uses the nm command.
25- All macros, constants, and identifiers (function names, struct names, etc)
26 follow the required regex pattern.
Pengyu Lv018b2f62022-11-08 15:55:00 +080027- Typo checking: All words that begin with MBED|PSA exist as macros or constants.
Gilles Peskine8266b5b2021-09-27 19:53:31 +020028
29The script returns 0 on success, 1 on test failure, and 2 if there is a script
30error. It must be run from Mbed TLS root.
31"""
32
33import abc
34import argparse
Gilles Peskine7bf52052021-09-27 19:20:17 +020035import fnmatch
Gilles Peskine8266b5b2021-09-27 19:53:31 +020036import glob
37import textwrap
38import os
39import sys
40import traceback
41import re
42import enum
43import shutil
44import subprocess
45import logging
46
Gilles Peskine7ff47662022-09-18 21:17:09 +020047import scripts_path # pylint: disable=unused-import
48from mbedtls_dev import build_tree
49
50
Gilles Peskine8266b5b2021-09-27 19:53:31 +020051# Naming patterns to check against. These are defined outside the NameCheck
52# class for ease of modification.
53MACRO_PATTERN = r"^(MBEDTLS|PSA)_[0-9A-Z_]*[0-9A-Z]$"
54CONSTANTS_PATTERN = MACRO_PATTERN
55IDENTIFIER_PATTERN = r"^(mbedtls|psa)_[0-9a-z_]*[0-9a-z]$"
56
57class Match(): # pylint: disable=too-few-public-methods
58 """
59 A class representing a match, together with its found position.
60
61 Fields:
62 * filename: the file that the match was in.
63 * line: the full line containing the match.
64 * line_no: the line number.
65 * pos: a tuple of (start, end) positions on the line where the match is.
66 * name: the match itself.
67 """
68 def __init__(self, filename, line, line_no, pos, name):
69 # pylint: disable=too-many-arguments
70 self.filename = filename
71 self.line = line
72 self.line_no = line_no
73 self.pos = pos
74 self.name = name
75
76 def __str__(self):
77 """
78 Return a formatted code listing representation of the erroneous line.
79 """
80 gutter = format(self.line_no, "4d")
81 underline = self.pos[0] * " " + (self.pos[1] - self.pos[0]) * "^"
82
83 return (
84 " {0} |\n".format(" " * len(gutter)) +
85 " {0} | {1}".format(gutter, self.line) +
86 " {0} | {1}\n".format(" " * len(gutter), underline)
87 )
88
89class Problem(abc.ABC): # pylint: disable=too-few-public-methods
90 """
91 An abstract parent class representing a form of static analysis error.
92 It extends an Abstract Base Class, which means it is not instantiable, and
93 it also mandates certain abstract methods to be implemented in subclasses.
94 """
95 # Class variable to control the quietness of all problems
96 quiet = False
97 def __init__(self):
98 self.textwrapper = textwrap.TextWrapper()
99 self.textwrapper.width = 80
100 self.textwrapper.initial_indent = " > "
101 self.textwrapper.subsequent_indent = " "
102
103 def __str__(self):
104 """
105 Unified string representation method for all Problems.
106 """
107 if self.__class__.quiet:
108 return self.quiet_output()
109 return self.verbose_output()
110
111 @abc.abstractmethod
112 def quiet_output(self):
113 """
114 The output when --quiet is enabled.
115 """
116 pass
117
118 @abc.abstractmethod
119 def verbose_output(self):
120 """
121 The default output with explanation and code snippet if appropriate.
122 """
123 pass
124
125class SymbolNotInHeader(Problem): # pylint: disable=too-few-public-methods
126 """
127 A problem that occurs when an exported/available symbol in the object file
128 is not explicitly declared in header files. Created with
129 NameCheck.check_symbols_declared_in_header()
130
131 Fields:
132 * symbol_name: the name of the symbol.
133 """
134 def __init__(self, symbol_name):
135 self.symbol_name = symbol_name
136 Problem.__init__(self)
137
138 def quiet_output(self):
139 return "{0}".format(self.symbol_name)
140
141 def verbose_output(self):
142 return self.textwrapper.fill(
143 "'{0}' was found as an available symbol in the output of nm, "
144 "however it was not declared in any header files."
145 .format(self.symbol_name))
146
147class PatternMismatch(Problem): # pylint: disable=too-few-public-methods
148 """
149 A problem that occurs when something doesn't match the expected pattern.
150 Created with NameCheck.check_match_pattern()
151
152 Fields:
153 * pattern: the expected regex pattern
154 * match: the Match object in question
155 """
156 def __init__(self, pattern, match):
157 self.pattern = pattern
158 self.match = match
159 Problem.__init__(self)
160
161
162 def quiet_output(self):
163 return (
164 "{0}:{1}:{2}"
165 .format(self.match.filename, self.match.line_no, self.match.name)
166 )
167
168 def verbose_output(self):
169 return self.textwrapper.fill(
170 "{0}:{1}: '{2}' does not match the required pattern '{3}'."
171 .format(
172 self.match.filename,
173 self.match.line_no,
174 self.match.name,
175 self.pattern
176 )
177 ) + "\n" + str(self.match)
178
179class Typo(Problem): # pylint: disable=too-few-public-methods
180 """
Pengyu Lv018b2f62022-11-08 15:55:00 +0800181 A problem that occurs when a word using MBED or PSA doesn't
182 appear to be defined as constants nor enum values. Created with
183 NameCheck.check_for_typos()
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200184
185 Fields:
Pengyu Lv018b2f62022-11-08 15:55:00 +0800186 * match: the Match object of the MBED|PSA name in question.
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200187 """
188 def __init__(self, match):
189 self.match = match
190 Problem.__init__(self)
191
192 def quiet_output(self):
193 return (
194 "{0}:{1}:{2}"
195 .format(self.match.filename, self.match.line_no, self.match.name)
196 )
197
198 def verbose_output(self):
199 return self.textwrapper.fill(
200 "{0}:{1}: '{2}' looks like a typo. It was not found in any "
201 "macros or any enums. If this is not a typo, put "
202 "//no-check-names after it."
203 .format(self.match.filename, self.match.line_no, self.match.name)
204 ) + "\n" + str(self.match)
205
206class CodeParser():
207 """
208 Class for retrieving files and parsing the code. This can be used
209 independently of the checks that NameChecker performs, for example for
210 list_internal_identifiers.py.
211 """
212 def __init__(self, log):
213 self.log = log
Gilles Peskine7ff47662022-09-18 21:17:09 +0200214 build_tree.check_repo_path()
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200215
216 # Memo for storing "glob expression": set(filepaths)
217 self.files = {}
218
Gilles Peskine7bf52052021-09-27 19:20:17 +0200219 # Globally excluded filenames.
220 # Note that "*" can match directory separators in exclude lists.
Gilles Peskined47f6362021-09-27 20:12:00 +0200221 self.excluded_files = ["*/bn_mul", "*/compat-1.3.h"]
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200222
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200223 def comprehensive_parse(self):
224 """
225 Comprehensive ("default") function to call each parsing function and
226 retrieve various elements of the code, together with the source location.
227
228 Returns a dict of parsed item key to the corresponding List of Matches.
229 """
230 self.log.info("Parsing source code...")
231 self.log.debug(
232 "The following files are excluded from the search: {}"
233 .format(str(self.excluded_files))
234 )
235
236 all_macros = self.parse_macros([
237 "include/mbedtls/*.h",
238 "include/psa/*.h",
239 "library/*.h",
240 "tests/include/test/drivers/*.h",
241 "3rdparty/everest/include/everest/everest.h",
242 "3rdparty/everest/include/everest/x25519.h"
243 ])
Pengyu Lv018b2f62022-11-08 15:55:00 +0800244 private_macros = self.parse_macros([
245 "library/*.c",
246 ])
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200247 enum_consts = self.parse_enum_consts([
248 "include/mbedtls/*.h",
Pengyu Lv018b2f62022-11-08 15:55:00 +0800249 "include/psa/*.h",
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200250 "library/*.h",
Pengyu Lv018b2f62022-11-08 15:55:00 +0800251 "library/*.c",
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200252 "3rdparty/everest/include/everest/everest.h",
253 "3rdparty/everest/include/everest/x25519.h"
254 ])
Aditya Deshpande94375c82023-01-25 17:00:12 +0000255 identifiers, excluded_identifiers = self.parse_identifiers([
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200256 "include/mbedtls/*.h",
257 "include/psa/*.h",
258 "library/*.h",
259 "3rdparty/everest/include/everest/everest.h",
260 "3rdparty/everest/include/everest/x25519.h"
261 ])
Pengyu Lv018b2f62022-11-08 15:55:00 +0800262 mbed_psa_words = self.parse_mbed_psa_words([
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200263 "include/mbedtls/*.h",
264 "include/psa/*.h",
265 "library/*.h",
266 "3rdparty/everest/include/everest/everest.h",
267 "3rdparty/everest/include/everest/x25519.h",
268 "library/*.c",
269 "3rdparty/everest/library/everest.c",
270 "3rdparty/everest/library/x25519.c"
271 ])
272 symbols = self.parse_symbols()
273
274 # Remove identifier macros like mbedtls_printf or mbedtls_calloc
275 identifiers_justname = [x.name for x in identifiers]
276 actual_macros = []
277 for macro in all_macros:
278 if macro.name not in identifiers_justname:
279 actual_macros.append(macro)
280
281 self.log.debug("Found:")
282 # Aligns the counts on the assumption that none exceeds 4 digits
283 self.log.debug(" {:4} Total Macros".format(len(all_macros)))
284 self.log.debug(" {:4} Non-identifier Macros".format(len(actual_macros)))
285 self.log.debug(" {:4} Enum Constants".format(len(enum_consts)))
286 self.log.debug(" {:4} Identifiers".format(len(identifiers)))
287 self.log.debug(" {:4} Exported Symbols".format(len(symbols)))
288 return {
289 "macros": actual_macros,
Pengyu Lv018b2f62022-11-08 15:55:00 +0800290 "private_macros": private_macros,
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200291 "enum_consts": enum_consts,
292 "identifiers": identifiers,
Aditya Deshpande94375c82023-01-25 17:00:12 +0000293 "excluded_identifiers": excluded_identifiers,
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200294 "symbols": symbols,
Pengyu Lv018b2f62022-11-08 15:55:00 +0800295 "mbed_psa_words": mbed_psa_words
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200296 }
297
Gilles Peskine7bf52052021-09-27 19:20:17 +0200298 def is_file_excluded(self, path, exclude_wildcards):
Gilles Peskine1c399752021-09-28 10:12:49 +0200299 """Whether the given file path is excluded."""
Gilles Peskine7bf52052021-09-27 19:20:17 +0200300 # exclude_wildcards may be None. Also, consider the global exclusions.
301 exclude_wildcards = (exclude_wildcards or []) + self.excluded_files
302 for pattern in exclude_wildcards:
303 if fnmatch.fnmatch(path, pattern):
304 return True
305 return False
306
Aditya Deshpande94375c82023-01-25 17:00:12 +0000307 def get_all_files(self, include_wildcards, exclude_wildcards):
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200308 """
Aditya Deshpande94375c82023-01-25 17:00:12 +0000309 Get all files that match any of the included UNIX-style wildcards
310 and filter them into included and excluded lists.
311 While the check_names script is designed only for use on UNIX/macOS
312 (due to nm), this function alone will work fine on Windows even with
313 forward slashes in the wildcard.
Aditya Deshpande7d20bb42023-01-27 15:45:32 +0000314
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200315 Args:
316 * include_wildcards: a List of shell-style wildcards to match filepaths.
317 * exclude_wildcards: a List of shell-style wildcards to exclude.
Aditya Deshpande7d20bb42023-01-27 15:45:32 +0000318
Aditya Deshpande94375c82023-01-25 17:00:12 +0000319 Returns:
320 * inc_files: A List of relative filepaths for included files.
321 * exc_files: A List of relative filepaths for excluded files.
322 """
323 accumulator = set()
324 all_wildcards = include_wildcards + (exclude_wildcards or [])
325 for wildcard in all_wildcards:
326 accumulator = accumulator.union(glob.iglob(wildcard))
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200327
Aditya Deshpande94375c82023-01-25 17:00:12 +0000328 inc_files = []
329 exc_files = []
330 for path in accumulator:
331 if self.is_file_excluded(path, exclude_wildcards):
332 exc_files.append(path)
333 else:
334 inc_files.append(path)
335 return (inc_files, exc_files)
336
337 def get_included_files(self, include_wildcards, exclude_wildcards):
338 """
339 Get all files that match any of the included UNIX-style wildcards.
340 While the check_names script is designed only for use on UNIX/macOS
341 (due to nm), this function alone will work fine on Windows even with
342 forward slashes in the wildcard.
Aditya Deshpande7d20bb42023-01-27 15:45:32 +0000343
Aditya Deshpande94375c82023-01-25 17:00:12 +0000344 Args:
345 * include_wildcards: a List of shell-style wildcards to match filepaths.
346 * exclude_wildcards: a List of shell-style wildcards to exclude.
Aditya Deshpande7d20bb42023-01-27 15:45:32 +0000347
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200348 Returns a List of relative filepaths.
349 """
350 accumulator = set()
351
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200352 for include_wildcard in include_wildcards:
Gilles Peskine7bf52052021-09-27 19:20:17 +0200353 accumulator = accumulator.union(glob.iglob(include_wildcard))
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200354
Gilles Peskine7bf52052021-09-27 19:20:17 +0200355 return list(path for path in accumulator
356 if not self.is_file_excluded(path, exclude_wildcards))
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200357
358 def parse_macros(self, include, exclude=None):
359 """
360 Parse all macros defined by #define preprocessor directives.
361
362 Args:
363 * include: A List of glob expressions to look for files through.
364 * exclude: A List of glob expressions for excluding files.
365
366 Returns a List of Match objects for the found macros.
367 """
368 macro_regex = re.compile(r"# *define +(?P<macro>\w+)")
369 exclusions = (
370 "asm", "inline", "EMIT", "_CRT_SECURE_NO_DEPRECATE", "MULADDC_"
371 )
372
Aditya Deshpande94375c82023-01-25 17:00:12 +0000373 files = self.get_included_files(include, exclude)
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200374 self.log.debug("Looking for macros in {} files".format(len(files)))
375
376 macros = []
377 for header_file in files:
378 with open(header_file, "r", encoding="utf-8") as header:
379 for line_no, line in enumerate(header):
380 for macro in macro_regex.finditer(line):
381 if macro.group("macro").startswith(exclusions):
382 continue
383
384 macros.append(Match(
385 header_file,
386 line,
387 line_no,
388 macro.span("macro"),
389 macro.group("macro")))
390
391 return macros
392
Pengyu Lv018b2f62022-11-08 15:55:00 +0800393 def parse_mbed_psa_words(self, include, exclude=None):
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200394 """
Pengyu Lv018b2f62022-11-08 15:55:00 +0800395 Parse all words in the file that begin with MBED|PSA, in and out of
396 macros, comments, anything.
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200397
398 Args:
399 * include: A List of glob expressions to look for files through.
400 * exclude: A List of glob expressions for excluding files.
401
Pengyu Lv018b2f62022-11-08 15:55:00 +0800402 Returns a List of Match objects for words beginning with MBED|PSA.
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200403 """
404 # Typos of TLS are common, hence the broader check below than MBEDTLS.
Pengyu Lv018b2f62022-11-08 15:55:00 +0800405 mbed_regex = re.compile(r"\b(MBED.+?|PSA)_[A-Z0-9_]*")
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200406 exclusions = re.compile(r"// *no-check-names|#error")
407
Aditya Deshpande94375c82023-01-25 17:00:12 +0000408 files = self.get_included_files(include, exclude)
Pengyu Lv018b2f62022-11-08 15:55:00 +0800409 self.log.debug(
410 "Looking for MBED|PSA words in {} files"
411 .format(len(files))
412 )
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200413
Pengyu Lv018b2f62022-11-08 15:55:00 +0800414 mbed_psa_words = []
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200415 for filename in files:
416 with open(filename, "r", encoding="utf-8") as fp:
417 for line_no, line in enumerate(fp):
418 if exclusions.search(line):
419 continue
420
421 for name in mbed_regex.finditer(line):
Pengyu Lv018b2f62022-11-08 15:55:00 +0800422 mbed_psa_words.append(Match(
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200423 filename,
424 line,
425 line_no,
426 name.span(0),
427 name.group(0)))
428
Pengyu Lv018b2f62022-11-08 15:55:00 +0800429 return mbed_psa_words
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200430
431 def parse_enum_consts(self, include, exclude=None):
432 """
433 Parse all enum value constants that are declared.
434
435 Args:
436 * include: A List of glob expressions to look for files through.
437 * exclude: A List of glob expressions for excluding files.
438
439 Returns a List of Match objects for the findings.
440 """
Aditya Deshpande94375c82023-01-25 17:00:12 +0000441 files = self.get_included_files(include, exclude)
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200442 self.log.debug("Looking for enum consts in {} files".format(len(files)))
443
444 # Emulate a finite state machine to parse enum declarations.
445 # OUTSIDE_KEYWORD = outside the enum keyword
446 # IN_BRACES = inside enum opening braces
447 # IN_BETWEEN = between enum keyword and opening braces
448 states = enum.Enum("FSM", ["OUTSIDE_KEYWORD", "IN_BRACES", "IN_BETWEEN"])
449 enum_consts = []
450 for header_file in files:
451 state = states.OUTSIDE_KEYWORD
452 with open(header_file, "r", encoding="utf-8") as header:
453 for line_no, line in enumerate(header):
454 # Match typedefs and brackets only when they are at the
455 # beginning of the line -- if they are indented, they might
456 # be sub-structures within structs, etc.
David Horstmanne1e776c2022-12-16 13:39:04 +0000457 optional_c_identifier = r"([_a-zA-Z][_a-zA-Z0-9]*)?"
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200458 if (state == states.OUTSIDE_KEYWORD and
David Horstmanne1e776c2022-12-16 13:39:04 +0000459 re.search(r"^(typedef +)?enum " + \
460 optional_c_identifier + \
461 r" *{", line)):
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200462 state = states.IN_BRACES
463 elif (state == states.OUTSIDE_KEYWORD and
464 re.search(r"^(typedef +)?enum", line)):
465 state = states.IN_BETWEEN
466 elif (state == states.IN_BETWEEN and
467 re.search(r"^{", line)):
468 state = states.IN_BRACES
469 elif (state == states.IN_BRACES and
470 re.search(r"^}", line)):
471 state = states.OUTSIDE_KEYWORD
472 elif (state == states.IN_BRACES and
473 not re.search(r"^ *#", line)):
474 enum_const = re.search(r"^ *(?P<enum_const>\w+)", line)
475 if not enum_const:
476 continue
477
478 enum_consts.append(Match(
479 header_file,
480 line,
481 line_no,
482 enum_const.span("enum_const"),
483 enum_const.group("enum_const")))
484
485 return enum_consts
486
Gilles Peskine44801622021-11-17 20:43:35 +0100487 IGNORED_CHUNK_REGEX = re.compile('|'.join([
488 r'/\*.*?\*/', # block comment entirely on one line
489 r'//.*', # line comment
490 r'(?P<string>")(?:[^\\\"]|\\.)*"', # string literal
491 ]))
492
Gilles Peskinedf306652021-11-17 20:32:31 +0100493 def strip_comments_and_literals(self, line, in_block_comment):
494 """Strip comments and string literals from line.
495
496 Continuation lines are not supported.
497
498 If in_block_comment is true, assume that the line starts inside a
499 block comment.
500
501 Return updated values of (line, in_block_comment) where:
502 * Comments in line have been replaced by a space (or nothing at the
503 start or end of the line).
504 * String contents have been removed.
505 * in_block_comment indicates whether the line ends inside a block
506 comment that continues on the next line.
507 """
Gilles Peskine23b40962021-11-17 20:45:39 +0100508
509 # Terminate current multiline comment?
Gilles Peskinedf306652021-11-17 20:32:31 +0100510 if in_block_comment:
Gilles Peskine23b40962021-11-17 20:45:39 +0100511 m = re.search(r"\*/", line)
512 if m:
513 in_block_comment = False
514 line = line[m.end(0):]
515 else:
516 return '', True
Gilles Peskine44801622021-11-17 20:43:35 +0100517
518 # Remove full comments and string literals.
519 # Do it all together to handle cases like "/*" correctly.
520 # Note that continuation lines are not supported.
521 line = re.sub(self.IGNORED_CHUNK_REGEX,
522 lambda s: '""' if s.group('string') else ' ',
Gilles Peskinedf306652021-11-17 20:32:31 +0100523 line)
Gilles Peskine44801622021-11-17 20:43:35 +0100524
Gilles Peskinedf306652021-11-17 20:32:31 +0100525 # Start an unfinished comment?
Gilles Peskine44801622021-11-17 20:43:35 +0100526 # (If `/*` was part of a complete comment, it's already been removed.)
Gilles Peskine23b40962021-11-17 20:45:39 +0100527 m = re.search(r"/\*", line)
Gilles Peskinedf306652021-11-17 20:32:31 +0100528 if m:
529 in_block_comment = True
Gilles Peskine23b40962021-11-17 20:45:39 +0100530 line = line[:m.start(0)]
Gilles Peskine44801622021-11-17 20:43:35 +0100531
Gilles Peskinedf306652021-11-17 20:32:31 +0100532 return line, in_block_comment
533
Gilles Peskinec8fc67f2021-11-17 20:23:18 +0100534 IDENTIFIER_REGEX = re.compile('|'.join([
Gilles Peskineb3f4dd52021-11-16 20:56:47 +0100535 # Match " something(a" or " *something(a". Functions.
536 # Assumptions:
537 # - function definition from return type to one of its arguments is
538 # all on one line
539 # - function definition line only contains alphanumeric, asterisk,
540 # underscore, and open bracket
Gilles Peskinec8fc67f2021-11-17 20:23:18 +0100541 r".* \**(\w+) *\( *\w",
Gilles Peskineb3f4dd52021-11-16 20:56:47 +0100542 # Match "(*something)(".
Gilles Peskinec8fc67f2021-11-17 20:23:18 +0100543 r".*\( *\* *(\w+) *\) *\(",
Gilles Peskineb3f4dd52021-11-16 20:56:47 +0100544 # Match names of named data structures.
Gilles Peskinec8fc67f2021-11-17 20:23:18 +0100545 r"(?:typedef +)?(?:struct|union|enum) +(\w+)(?: *{)?$",
Gilles Peskineb3f4dd52021-11-16 20:56:47 +0100546 # Match names of typedef instances, after closing bracket.
Gilles Peskinec8fc67f2021-11-17 20:23:18 +0100547 r"}? *(\w+)[;[].*",
548 ]))
Gilles Peskineb3f4dd52021-11-16 20:56:47 +0100549 # The regex below is indented for clarity.
Gilles Peskinec8fc67f2021-11-17 20:23:18 +0100550 EXCLUSION_LINES = re.compile("|".join([
551 r"extern +\"C\"",
552 r"(typedef +)?(struct|union|enum)( *{)?$",
553 r"} *;?$",
554 r"$",
555 r"//",
556 r"#",
557 ]))
Gilles Peskineb3f4dd52021-11-16 20:56:47 +0100558
559 def parse_identifiers_in_file(self, header_file, identifiers):
560 """
561 Parse all lines of a header where a function/enum/struct/union/typedef
562 identifier is declared, based on some regex and heuristics. Highly
563 dependent on formatting style.
564
565 Append found matches to the list ``identifiers``.
566 """
567
568 with open(header_file, "r", encoding="utf-8") as header:
569 in_block_comment = False
570 # The previous line variable is used for concatenating lines
571 # when identifiers are formatted and spread across multiple
572 # lines.
573 previous_line = ""
574
575 for line_no, line in enumerate(header):
Gilles Peskinedf306652021-11-17 20:32:31 +0100576 line, in_block_comment = \
577 self.strip_comments_and_literals(line, in_block_comment)
Gilles Peskineb3f4dd52021-11-16 20:56:47 +0100578
Gilles Peskinec8fc67f2021-11-17 20:23:18 +0100579 if self.EXCLUSION_LINES.match(line):
Gilles Peskineb3f4dd52021-11-16 20:56:47 +0100580 previous_line = ""
581 continue
582
583 # If the line contains only space-separated alphanumeric
Gilles Peskine4f04d612021-11-17 20:39:56 +0100584 # characters (or underscore, asterisk, or open parenthesis),
Gilles Peskineb3f4dd52021-11-16 20:56:47 +0100585 # and nothing else, high chance it's a declaration that
586 # continues on the next line
587 if re.search(r"^([\w\*\(]+\s+)+$", line):
588 previous_line += line
589 continue
590
591 # If previous line seemed to start an unfinished declaration
592 # (as above), concat and treat them as one.
593 if previous_line:
594 line = previous_line.strip() + " " + line.strip() + "\n"
595 previous_line = ""
596
597 # Skip parsing if line has a space in front = heuristic to
598 # skip function argument lines (highly subject to formatting
599 # changes)
600 if line[0] == " ":
601 continue
602
603 identifier = self.IDENTIFIER_REGEX.search(line)
604
605 if not identifier:
606 continue
607
608 # Find the group that matched, and append it
609 for group in identifier.groups():
610 if not group:
611 continue
612
613 identifiers.append(Match(
614 header_file,
615 line,
616 line_no,
617 identifier.span(),
618 group))
619
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200620 def parse_identifiers(self, include, exclude=None):
621 """
622 Parse all lines of a header where a function/enum/struct/union/typedef
623 identifier is declared, based on some regex and heuristics. Highly
Aditya Deshpande94375c82023-01-25 17:00:12 +0000624 dependent on formatting style. Identifiers in excluded files are still
625 parsed
Aditya Deshpande7d20bb42023-01-27 15:45:32 +0000626
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200627 Args:
628 * include: A List of glob expressions to look for files through.
629 * exclude: A List of glob expressions for excluding files.
Aditya Deshpande7d20bb42023-01-27 15:45:32 +0000630
Aditya Deshpande94375c82023-01-25 17:00:12 +0000631 Returns: a Tuple of two Lists of Match objects with identifiers.
632 * included_identifiers: A List of Match objects with identifiers from
633 included files.
634 * excluded_identifiers: A List of Match objects with identifiers from
635 excluded files.
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200636 """
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200637
Aditya Deshpande94375c82023-01-25 17:00:12 +0000638 included_files, excluded_files = \
639 self.get_all_files(include, exclude)
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200640
Aditya Deshpande94375c82023-01-25 17:00:12 +0000641 self.log.debug("Looking for included identifiers in {} files".format \
642 (len(included_files)))
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200643
Aditya Deshpande94375c82023-01-25 17:00:12 +0000644 included_identifiers = []
645 excluded_identifiers = []
646 for header_file in included_files:
647 self.parse_identifiers_in_file(header_file, included_identifiers)
648 for header_file in excluded_files:
649 self.parse_identifiers_in_file(header_file, excluded_identifiers)
650
651 return (included_identifiers, excluded_identifiers)
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200652
653 def parse_symbols(self):
654 """
655 Compile the Mbed TLS libraries, and parse the TLS, Crypto, and x509
656 object files using nm to retrieve the list of referenced symbols.
657 Exceptions thrown here are rethrown because they would be critical
658 errors that void several tests, and thus needs to halt the program. This
659 is explicitly done for clarity.
660
661 Returns a List of unique symbols defined and used in the libraries.
662 """
663 self.log.info("Compiling...")
664 symbols = []
665
Tom Cosgrove49f99bc2022-12-04 16:44:21 +0000666 # Back up the config and atomically compile with the full configuration.
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200667 shutil.copy(
Gilles Peskined47f6362021-09-27 20:12:00 +0200668 "include/mbedtls/config.h",
669 "include/mbedtls/config.h.bak"
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200670 )
671 try:
672 # Use check=True in all subprocess calls so that failures are raised
673 # as exceptions and logged.
674 subprocess.run(
675 ["python3", "scripts/config.py", "full"],
676 universal_newlines=True,
677 check=True
678 )
679 my_environment = os.environ.copy()
680 my_environment["CFLAGS"] = "-fno-asynchronous-unwind-tables"
681 # Run make clean separately to lib to prevent unwanted behavior when
682 # make is invoked with parallelism.
683 subprocess.run(
684 ["make", "clean"],
685 universal_newlines=True,
686 check=True
687 )
688 subprocess.run(
689 ["make", "lib"],
690 env=my_environment,
691 universal_newlines=True,
692 stdout=subprocess.PIPE,
693 stderr=subprocess.STDOUT,
694 check=True
695 )
696
697 # Perform object file analysis using nm
698 symbols = self.parse_symbols_from_nm([
699 "library/libmbedcrypto.a",
700 "library/libmbedtls.a",
701 "library/libmbedx509.a"
702 ])
703
704 subprocess.run(
705 ["make", "clean"],
706 universal_newlines=True,
707 check=True
708 )
709 except subprocess.CalledProcessError as error:
710 self.log.debug(error.output)
711 raise error
712 finally:
713 # Put back the original config regardless of there being errors.
714 # Works also for keyboard interrupts.
715 shutil.move(
Gilles Peskined47f6362021-09-27 20:12:00 +0200716 "include/mbedtls/config.h.bak",
717 "include/mbedtls/config.h"
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200718 )
719
720 return symbols
721
722 def parse_symbols_from_nm(self, object_files):
723 """
724 Run nm to retrieve the list of referenced symbols in each object file.
725 Does not return the position data since it is of no use.
726
727 Args:
728 * object_files: a List of compiled object filepaths to search through.
729
730 Returns a List of unique symbols defined and used in any of the object
731 files.
732 """
733 nm_undefined_regex = re.compile(r"^\S+: +U |^$|^\S+:$")
734 nm_valid_regex = re.compile(r"^\S+( [0-9A-Fa-f]+)* . _*(?P<symbol>\w+)")
735 exclusions = ("FStar", "Hacl")
736
737 symbols = []
738
739 # Gather all outputs of nm
740 nm_output = ""
741 for lib in object_files:
742 nm_output += subprocess.run(
743 ["nm", "-og", lib],
744 universal_newlines=True,
745 stdout=subprocess.PIPE,
746 stderr=subprocess.STDOUT,
747 check=True
748 ).stdout
749
750 for line in nm_output.splitlines():
751 if not nm_undefined_regex.search(line):
752 symbol = nm_valid_regex.search(line)
753 if (symbol and not symbol.group("symbol").startswith(exclusions)):
754 symbols.append(symbol.group("symbol"))
755 else:
756 self.log.error(line)
757
758 return symbols
759
760class NameChecker():
761 """
762 Representation of the core name checking operation performed by this script.
763 """
764 def __init__(self, parse_result, log):
765 self.parse_result = parse_result
766 self.log = log
767
768 def perform_checks(self, quiet=False):
769 """
770 A comprehensive checker that performs each check in order, and outputs
771 a final verdict.
772
773 Args:
774 * quiet: whether to hide detailed problem explanation.
775 """
776 self.log.info("=============")
777 Problem.quiet = quiet
778 problems = 0
779 problems += self.check_symbols_declared_in_header()
780
781 pattern_checks = [
782 ("macros", MACRO_PATTERN),
783 ("enum_consts", CONSTANTS_PATTERN),
784 ("identifiers", IDENTIFIER_PATTERN)
785 ]
786 for group, check_pattern in pattern_checks:
787 problems += self.check_match_pattern(group, check_pattern)
788
789 problems += self.check_for_typos()
790
791 self.log.info("=============")
792 if problems > 0:
793 self.log.info("FAIL: {0} problem(s) to fix".format(str(problems)))
794 if quiet:
795 self.log.info("Remove --quiet to see explanations.")
796 else:
797 self.log.info("Use --quiet for minimal output.")
798 return 1
799 else:
800 self.log.info("PASS")
801 return 0
802
803 def check_symbols_declared_in_header(self):
804 """
805 Perform a check that all detected symbols in the library object files
806 are properly declared in headers.
807 Assumes parse_names_in_source() was called before this.
Aditya Deshpande7d20bb42023-01-27 15:45:32 +0000808
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200809 Returns the number of problems that need fixing.
810 """
811 problems = []
Aditya Deshpande94375c82023-01-25 17:00:12 +0000812 all_identifiers = self.parse_result["identifiers"] + \
813 self.parse_result["excluded_identifiers"]
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200814
815 for symbol in self.parse_result["symbols"]:
816 found_symbol_declared = False
Aditya Deshpande94375c82023-01-25 17:00:12 +0000817 for identifier_match in all_identifiers:
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200818 if symbol == identifier_match.name:
819 found_symbol_declared = True
820 break
821
822 if not found_symbol_declared:
823 problems.append(SymbolNotInHeader(symbol))
824
825 self.output_check_result("All symbols in header", problems)
826 return len(problems)
827
828 def check_match_pattern(self, group_to_check, check_pattern):
829 """
830 Perform a check that all items of a group conform to a regex pattern.
831 Assumes parse_names_in_source() was called before this.
832
833 Args:
834 * group_to_check: string key to index into self.parse_result.
835 * check_pattern: the regex to check against.
836
837 Returns the number of problems that need fixing.
838 """
839 problems = []
840
841 for item_match in self.parse_result[group_to_check]:
842 if not re.search(check_pattern, item_match.name):
843 problems.append(PatternMismatch(check_pattern, item_match))
844 # Double underscore should not be used for names
845 if re.search(r".*__.*", item_match.name):
846 problems.append(
847 PatternMismatch("no double underscore allowed", item_match))
848
849 self.output_check_result(
850 "Naming patterns of {}".format(group_to_check),
851 problems)
852 return len(problems)
853
854 def check_for_typos(self):
855 """
Shaun Case0e7791f2021-12-20 21:14:10 -0800856 Perform a check that all words in the source code beginning with MBED are
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200857 either defined as macros, or as enum constants.
858 Assumes parse_names_in_source() was called before this.
859
860 Returns the number of problems that need fixing.
861 """
862 problems = []
863
864 # Set comprehension, equivalent to a list comprehension wrapped by set()
865 all_caps_names = {
866 match.name
867 for match
Pengyu Lv018b2f62022-11-08 15:55:00 +0800868 in self.parse_result["macros"] +
869 self.parse_result["private_macros"] +
870 self.parse_result["enum_consts"]
871 }
Ronald Cronb814bda2021-09-13 14:50:42 +0200872 typo_exclusion = re.compile(r"XXX|__|_$|^MBEDTLS_.*CONFIG_FILE$|"
Pengyu Lvfda7f502022-11-08 16:56:51 +0800873 r"MBEDTLS_TEST_LIBTESTDRIVER*|"
874 r"PSA_CRYPTO_DRIVER_TEST")
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200875
Pengyu Lv018b2f62022-11-08 15:55:00 +0800876 for name_match in self.parse_result["mbed_psa_words"]:
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200877 found = name_match.name in all_caps_names
878
879 # Since MBEDTLS_PSA_ACCEL_XXX defines are defined by the
880 # PSA driver, they will not exist as macros. However, they
881 # should still be checked for typos using the equivalent
882 # BUILTINs that exist.
883 if "MBEDTLS_PSA_ACCEL_" in name_match.name:
884 found = name_match.name.replace(
885 "MBEDTLS_PSA_ACCEL_",
886 "MBEDTLS_PSA_BUILTIN_") in all_caps_names
887
888 if not found and not typo_exclusion.search(name_match.name):
889 problems.append(Typo(name_match))
890
891 self.output_check_result("Likely typos", problems)
892 return len(problems)
893
894 def output_check_result(self, name, problems):
895 """
896 Write out the PASS/FAIL status of a performed check depending on whether
897 there were problems.
898
899 Args:
900 * name: the name of the test
901 * problems: a List of encountered Problems
902 """
903 if problems:
904 self.log.info("{}: FAIL\n".format(name))
905 for problem in problems:
906 self.log.warning(str(problem))
907 else:
908 self.log.info("{}: PASS".format(name))
909
910def main():
911 """
912 Perform argument parsing, and create an instance of CodeParser and
913 NameChecker to begin the core operation.
914 """
915 parser = argparse.ArgumentParser(
916 formatter_class=argparse.RawDescriptionHelpFormatter,
917 description=(
918 "This script confirms that the naming of all symbols and identifiers "
919 "in Mbed TLS are consistent with the house style and are also "
920 "self-consistent.\n\n"
Thomas Daubney0814a222023-10-06 17:37:01 +0100921 "Expected to be run from the Mbed TLS root directory.")
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200922 )
923 parser.add_argument(
924 "-v", "--verbose",
925 action="store_true",
926 help="show parse results"
927 )
928 parser.add_argument(
929 "-q", "--quiet",
930 action="store_true",
Tom Cosgrove49f99bc2022-12-04 16:44:21 +0000931 help="hide unnecessary text, explanations, and highlights"
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200932 )
933
934 args = parser.parse_args()
935
936 # Configure the global logger, which is then passed to the classes below
937 log = logging.getLogger()
938 log.setLevel(logging.DEBUG if args.verbose else logging.INFO)
939 log.addHandler(logging.StreamHandler())
940
941 try:
942 code_parser = CodeParser(log)
943 parse_result = code_parser.comprehensive_parse()
944 except Exception: # pylint: disable=broad-except
945 traceback.print_exc()
946 sys.exit(2)
947
948 name_checker = NameChecker(parse_result, log)
949 return_code = name_checker.perform_checks(quiet=args.quiet)
950
951 sys.exit(return_code)
952
953if __name__ == "__main__":
954 main()