blob: df53aa045e6ed42afb446a7451d5cdb9b3f81e56 [file] [log] [blame]
Gilles Peskine8266b5b2021-09-27 19:53:31 +02001#!/usr/bin/env python3
2#
3# Copyright The Mbed TLS Contributors
4# SPDX-License-Identifier: Apache-2.0
5#
6# Licensed under the Apache License, Version 2.0 (the "License"); you may
7# not use this file except in compliance with the License.
8# You may obtain a copy of the License at
9#
10# http://www.apache.org/licenses/LICENSE-2.0
11#
12# Unless required by applicable law or agreed to in writing, software
13# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
14# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15# See the License for the specific language governing permissions and
16# limitations under the License.
17
18"""
19This script confirms that the naming of all symbols and identifiers in Mbed TLS
20are consistent with the house style and are also self-consistent. It only runs
21on Linux and macOS since it depends on nm.
22
23It contains two major Python classes, CodeParser and NameChecker. They both have
24a comprehensive "run-all" function (comprehensive_parse() and perform_checks())
25but the individual functions can also be used for specific needs.
26
27CodeParser makes heavy use of regular expressions to parse the code, and is
28dependent on the current code formatting. Many Python C parser libraries require
29preprocessed C code, which means no macro parsing. Compiler tools are also not
30very helpful when we want the exact location in the original source (which
31becomes impossible when e.g. comments are stripped).
32
33NameChecker performs the following checks:
34
35- All exported and available symbols in the library object files, are explicitly
36 declared in the header files. This uses the nm command.
37- All macros, constants, and identifiers (function names, struct names, etc)
38 follow the required regex pattern.
Pengyu Lv018b2f62022-11-08 15:55:00 +080039- Typo checking: All words that begin with MBED|PSA exist as macros or constants.
Gilles Peskine8266b5b2021-09-27 19:53:31 +020040
41The script returns 0 on success, 1 on test failure, and 2 if there is a script
42error. It must be run from Mbed TLS root.
43"""
44
45import abc
46import argparse
Gilles Peskine7bf52052021-09-27 19:20:17 +020047import fnmatch
Gilles Peskine8266b5b2021-09-27 19:53:31 +020048import glob
49import textwrap
50import os
51import sys
52import traceback
53import re
54import enum
55import shutil
56import subprocess
57import logging
58
Gilles Peskine7ff47662022-09-18 21:17:09 +020059import scripts_path # pylint: disable=unused-import
60from mbedtls_dev import build_tree
61
62
Gilles Peskine8266b5b2021-09-27 19:53:31 +020063# Naming patterns to check against. These are defined outside the NameCheck
64# class for ease of modification.
65MACRO_PATTERN = r"^(MBEDTLS|PSA)_[0-9A-Z_]*[0-9A-Z]$"
66CONSTANTS_PATTERN = MACRO_PATTERN
67IDENTIFIER_PATTERN = r"^(mbedtls|psa)_[0-9a-z_]*[0-9a-z]$"
68
69class Match(): # pylint: disable=too-few-public-methods
70 """
71 A class representing a match, together with its found position.
72
73 Fields:
74 * filename: the file that the match was in.
75 * line: the full line containing the match.
76 * line_no: the line number.
77 * pos: a tuple of (start, end) positions on the line where the match is.
78 * name: the match itself.
79 """
80 def __init__(self, filename, line, line_no, pos, name):
81 # pylint: disable=too-many-arguments
82 self.filename = filename
83 self.line = line
84 self.line_no = line_no
85 self.pos = pos
86 self.name = name
87
88 def __str__(self):
89 """
90 Return a formatted code listing representation of the erroneous line.
91 """
92 gutter = format(self.line_no, "4d")
93 underline = self.pos[0] * " " + (self.pos[1] - self.pos[0]) * "^"
94
95 return (
96 " {0} |\n".format(" " * len(gutter)) +
97 " {0} | {1}".format(gutter, self.line) +
98 " {0} | {1}\n".format(" " * len(gutter), underline)
99 )
100
101class Problem(abc.ABC): # pylint: disable=too-few-public-methods
102 """
103 An abstract parent class representing a form of static analysis error.
104 It extends an Abstract Base Class, which means it is not instantiable, and
105 it also mandates certain abstract methods to be implemented in subclasses.
106 """
107 # Class variable to control the quietness of all problems
108 quiet = False
109 def __init__(self):
110 self.textwrapper = textwrap.TextWrapper()
111 self.textwrapper.width = 80
112 self.textwrapper.initial_indent = " > "
113 self.textwrapper.subsequent_indent = " "
114
115 def __str__(self):
116 """
117 Unified string representation method for all Problems.
118 """
119 if self.__class__.quiet:
120 return self.quiet_output()
121 return self.verbose_output()
122
123 @abc.abstractmethod
124 def quiet_output(self):
125 """
126 The output when --quiet is enabled.
127 """
128 pass
129
130 @abc.abstractmethod
131 def verbose_output(self):
132 """
133 The default output with explanation and code snippet if appropriate.
134 """
135 pass
136
137class SymbolNotInHeader(Problem): # pylint: disable=too-few-public-methods
138 """
139 A problem that occurs when an exported/available symbol in the object file
140 is not explicitly declared in header files. Created with
141 NameCheck.check_symbols_declared_in_header()
142
143 Fields:
144 * symbol_name: the name of the symbol.
145 """
146 def __init__(self, symbol_name):
147 self.symbol_name = symbol_name
148 Problem.__init__(self)
149
150 def quiet_output(self):
151 return "{0}".format(self.symbol_name)
152
153 def verbose_output(self):
154 return self.textwrapper.fill(
155 "'{0}' was found as an available symbol in the output of nm, "
156 "however it was not declared in any header files."
157 .format(self.symbol_name))
158
159class PatternMismatch(Problem): # pylint: disable=too-few-public-methods
160 """
161 A problem that occurs when something doesn't match the expected pattern.
162 Created with NameCheck.check_match_pattern()
163
164 Fields:
165 * pattern: the expected regex pattern
166 * match: the Match object in question
167 """
168 def __init__(self, pattern, match):
169 self.pattern = pattern
170 self.match = match
171 Problem.__init__(self)
172
173
174 def quiet_output(self):
175 return (
176 "{0}:{1}:{2}"
177 .format(self.match.filename, self.match.line_no, self.match.name)
178 )
179
180 def verbose_output(self):
181 return self.textwrapper.fill(
182 "{0}:{1}: '{2}' does not match the required pattern '{3}'."
183 .format(
184 self.match.filename,
185 self.match.line_no,
186 self.match.name,
187 self.pattern
188 )
189 ) + "\n" + str(self.match)
190
191class Typo(Problem): # pylint: disable=too-few-public-methods
192 """
Pengyu Lv018b2f62022-11-08 15:55:00 +0800193 A problem that occurs when a word using MBED or PSA doesn't
194 appear to be defined as constants nor enum values. Created with
195 NameCheck.check_for_typos()
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200196
197 Fields:
Pengyu Lv018b2f62022-11-08 15:55:00 +0800198 * match: the Match object of the MBED|PSA name in question.
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200199 """
200 def __init__(self, match):
201 self.match = match
202 Problem.__init__(self)
203
204 def quiet_output(self):
205 return (
206 "{0}:{1}:{2}"
207 .format(self.match.filename, self.match.line_no, self.match.name)
208 )
209
210 def verbose_output(self):
211 return self.textwrapper.fill(
212 "{0}:{1}: '{2}' looks like a typo. It was not found in any "
213 "macros or any enums. If this is not a typo, put "
214 "//no-check-names after it."
215 .format(self.match.filename, self.match.line_no, self.match.name)
216 ) + "\n" + str(self.match)
217
218class CodeParser():
219 """
220 Class for retrieving files and parsing the code. This can be used
221 independently of the checks that NameChecker performs, for example for
222 list_internal_identifiers.py.
223 """
224 def __init__(self, log):
225 self.log = log
Gilles Peskine7ff47662022-09-18 21:17:09 +0200226 build_tree.check_repo_path()
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200227
228 # Memo for storing "glob expression": set(filepaths)
229 self.files = {}
230
Gilles Peskine7bf52052021-09-27 19:20:17 +0200231 # Globally excluded filenames.
232 # Note that "*" can match directory separators in exclude lists.
Gilles Peskined47f6362021-09-27 20:12:00 +0200233 self.excluded_files = ["*/bn_mul", "*/compat-1.3.h"]
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200234
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200235 def comprehensive_parse(self):
236 """
237 Comprehensive ("default") function to call each parsing function and
238 retrieve various elements of the code, together with the source location.
239
240 Returns a dict of parsed item key to the corresponding List of Matches.
241 """
242 self.log.info("Parsing source code...")
243 self.log.debug(
244 "The following files are excluded from the search: {}"
245 .format(str(self.excluded_files))
246 )
247
248 all_macros = self.parse_macros([
249 "include/mbedtls/*.h",
250 "include/psa/*.h",
251 "library/*.h",
252 "tests/include/test/drivers/*.h",
253 "3rdparty/everest/include/everest/everest.h",
254 "3rdparty/everest/include/everest/x25519.h"
255 ])
Pengyu Lv018b2f62022-11-08 15:55:00 +0800256 private_macros = self.parse_macros([
257 "library/*.c",
258 ])
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200259 enum_consts = self.parse_enum_consts([
260 "include/mbedtls/*.h",
Pengyu Lv018b2f62022-11-08 15:55:00 +0800261 "include/psa/*.h",
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200262 "library/*.h",
Pengyu Lv018b2f62022-11-08 15:55:00 +0800263 "library/*.c",
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200264 "3rdparty/everest/include/everest/everest.h",
265 "3rdparty/everest/include/everest/x25519.h"
266 ])
Aditya Deshpande94375c82023-01-25 17:00:12 +0000267 identifiers, excluded_identifiers = self.parse_identifiers([
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200268 "include/mbedtls/*.h",
269 "include/psa/*.h",
270 "library/*.h",
271 "3rdparty/everest/include/everest/everest.h",
272 "3rdparty/everest/include/everest/x25519.h"
273 ])
Pengyu Lv018b2f62022-11-08 15:55:00 +0800274 mbed_psa_words = self.parse_mbed_psa_words([
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200275 "include/mbedtls/*.h",
276 "include/psa/*.h",
277 "library/*.h",
278 "3rdparty/everest/include/everest/everest.h",
279 "3rdparty/everest/include/everest/x25519.h",
280 "library/*.c",
281 "3rdparty/everest/library/everest.c",
282 "3rdparty/everest/library/x25519.c"
283 ])
284 symbols = self.parse_symbols()
285
286 # Remove identifier macros like mbedtls_printf or mbedtls_calloc
287 identifiers_justname = [x.name for x in identifiers]
288 actual_macros = []
289 for macro in all_macros:
290 if macro.name not in identifiers_justname:
291 actual_macros.append(macro)
292
293 self.log.debug("Found:")
294 # Aligns the counts on the assumption that none exceeds 4 digits
295 self.log.debug(" {:4} Total Macros".format(len(all_macros)))
296 self.log.debug(" {:4} Non-identifier Macros".format(len(actual_macros)))
297 self.log.debug(" {:4} Enum Constants".format(len(enum_consts)))
298 self.log.debug(" {:4} Identifiers".format(len(identifiers)))
299 self.log.debug(" {:4} Exported Symbols".format(len(symbols)))
300 return {
301 "macros": actual_macros,
Pengyu Lv018b2f62022-11-08 15:55:00 +0800302 "private_macros": private_macros,
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200303 "enum_consts": enum_consts,
304 "identifiers": identifiers,
Aditya Deshpande94375c82023-01-25 17:00:12 +0000305 "excluded_identifiers": excluded_identifiers,
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200306 "symbols": symbols,
Pengyu Lv018b2f62022-11-08 15:55:00 +0800307 "mbed_psa_words": mbed_psa_words
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200308 }
309
Gilles Peskine7bf52052021-09-27 19:20:17 +0200310 def is_file_excluded(self, path, exclude_wildcards):
Gilles Peskine1c399752021-09-28 10:12:49 +0200311 """Whether the given file path is excluded."""
Gilles Peskine7bf52052021-09-27 19:20:17 +0200312 # exclude_wildcards may be None. Also, consider the global exclusions.
313 exclude_wildcards = (exclude_wildcards or []) + self.excluded_files
314 for pattern in exclude_wildcards:
315 if fnmatch.fnmatch(path, pattern):
316 return True
317 return False
318
Aditya Deshpande94375c82023-01-25 17:00:12 +0000319 def get_all_files(self, include_wildcards, exclude_wildcards):
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200320 """
Aditya Deshpande94375c82023-01-25 17:00:12 +0000321 Get all files that match any of the included UNIX-style wildcards
322 and filter them into included and excluded lists.
323 While the check_names script is designed only for use on UNIX/macOS
324 (due to nm), this function alone will work fine on Windows even with
325 forward slashes in the wildcard.
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200326 Args:
327 * include_wildcards: a List of shell-style wildcards to match filepaths.
328 * exclude_wildcards: a List of shell-style wildcards to exclude.
Aditya Deshpande94375c82023-01-25 17:00:12 +0000329 Returns:
330 * inc_files: A List of relative filepaths for included files.
331 * exc_files: A List of relative filepaths for excluded files.
332 """
333 accumulator = set()
334 all_wildcards = include_wildcards + (exclude_wildcards or [])
335 for wildcard in all_wildcards:
336 accumulator = accumulator.union(glob.iglob(wildcard))
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200337
Aditya Deshpande94375c82023-01-25 17:00:12 +0000338 inc_files = []
339 exc_files = []
340 for path in accumulator:
341 if self.is_file_excluded(path, exclude_wildcards):
342 exc_files.append(path)
343 else:
344 inc_files.append(path)
345 return (inc_files, exc_files)
346
347 def get_included_files(self, include_wildcards, exclude_wildcards):
348 """
349 Get all files that match any of the included UNIX-style wildcards.
350 While the check_names script is designed only for use on UNIX/macOS
351 (due to nm), this function alone will work fine on Windows even with
352 forward slashes in the wildcard.
353 Args:
354 * include_wildcards: a List of shell-style wildcards to match filepaths.
355 * exclude_wildcards: a List of shell-style wildcards to exclude.
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200356 Returns a List of relative filepaths.
357 """
358 accumulator = set()
359
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200360 for include_wildcard in include_wildcards:
Gilles Peskine7bf52052021-09-27 19:20:17 +0200361 accumulator = accumulator.union(glob.iglob(include_wildcard))
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200362
Gilles Peskine7bf52052021-09-27 19:20:17 +0200363 return list(path for path in accumulator
364 if not self.is_file_excluded(path, exclude_wildcards))
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200365
Aditya Deshpande94375c82023-01-25 17:00:12 +0000366
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200367 def parse_macros(self, include, exclude=None):
368 """
369 Parse all macros defined by #define preprocessor directives.
370
371 Args:
372 * include: A List of glob expressions to look for files through.
373 * exclude: A List of glob expressions for excluding files.
374
375 Returns a List of Match objects for the found macros.
376 """
377 macro_regex = re.compile(r"# *define +(?P<macro>\w+)")
378 exclusions = (
379 "asm", "inline", "EMIT", "_CRT_SECURE_NO_DEPRECATE", "MULADDC_"
380 )
381
Aditya Deshpande94375c82023-01-25 17:00:12 +0000382 files = self.get_included_files(include, exclude)
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200383 self.log.debug("Looking for macros in {} files".format(len(files)))
384
385 macros = []
386 for header_file in files:
387 with open(header_file, "r", encoding="utf-8") as header:
388 for line_no, line in enumerate(header):
389 for macro in macro_regex.finditer(line):
390 if macro.group("macro").startswith(exclusions):
391 continue
392
393 macros.append(Match(
394 header_file,
395 line,
396 line_no,
397 macro.span("macro"),
398 macro.group("macro")))
399
400 return macros
401
Pengyu Lv018b2f62022-11-08 15:55:00 +0800402 def parse_mbed_psa_words(self, include, exclude=None):
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200403 """
Pengyu Lv018b2f62022-11-08 15:55:00 +0800404 Parse all words in the file that begin with MBED|PSA, in and out of
405 macros, comments, anything.
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200406
407 Args:
408 * include: A List of glob expressions to look for files through.
409 * exclude: A List of glob expressions for excluding files.
410
Pengyu Lv018b2f62022-11-08 15:55:00 +0800411 Returns a List of Match objects for words beginning with MBED|PSA.
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200412 """
413 # Typos of TLS are common, hence the broader check below than MBEDTLS.
Pengyu Lv018b2f62022-11-08 15:55:00 +0800414 mbed_regex = re.compile(r"\b(MBED.+?|PSA)_[A-Z0-9_]*")
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200415 exclusions = re.compile(r"// *no-check-names|#error")
416
Aditya Deshpande94375c82023-01-25 17:00:12 +0000417 files = self.get_included_files(include, exclude)
Pengyu Lv018b2f62022-11-08 15:55:00 +0800418 self.log.debug(
419 "Looking for MBED|PSA words in {} files"
420 .format(len(files))
421 )
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200422
Pengyu Lv018b2f62022-11-08 15:55:00 +0800423 mbed_psa_words = []
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200424 for filename in files:
425 with open(filename, "r", encoding="utf-8") as fp:
426 for line_no, line in enumerate(fp):
427 if exclusions.search(line):
428 continue
429
430 for name in mbed_regex.finditer(line):
Pengyu Lv018b2f62022-11-08 15:55:00 +0800431 mbed_psa_words.append(Match(
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200432 filename,
433 line,
434 line_no,
435 name.span(0),
436 name.group(0)))
437
Pengyu Lv018b2f62022-11-08 15:55:00 +0800438 return mbed_psa_words
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200439
440 def parse_enum_consts(self, include, exclude=None):
441 """
442 Parse all enum value constants that are declared.
443
444 Args:
445 * include: A List of glob expressions to look for files through.
446 * exclude: A List of glob expressions for excluding files.
447
448 Returns a List of Match objects for the findings.
449 """
Aditya Deshpande94375c82023-01-25 17:00:12 +0000450 files = self.get_included_files(include, exclude)
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200451 self.log.debug("Looking for enum consts in {} files".format(len(files)))
452
453 # Emulate a finite state machine to parse enum declarations.
454 # OUTSIDE_KEYWORD = outside the enum keyword
455 # IN_BRACES = inside enum opening braces
456 # IN_BETWEEN = between enum keyword and opening braces
457 states = enum.Enum("FSM", ["OUTSIDE_KEYWORD", "IN_BRACES", "IN_BETWEEN"])
458 enum_consts = []
459 for header_file in files:
460 state = states.OUTSIDE_KEYWORD
461 with open(header_file, "r", encoding="utf-8") as header:
462 for line_no, line in enumerate(header):
463 # Match typedefs and brackets only when they are at the
464 # beginning of the line -- if they are indented, they might
465 # be sub-structures within structs, etc.
David Horstmanne1e776c2022-12-16 13:39:04 +0000466 optional_c_identifier = r"([_a-zA-Z][_a-zA-Z0-9]*)?"
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200467 if (state == states.OUTSIDE_KEYWORD and
David Horstmanne1e776c2022-12-16 13:39:04 +0000468 re.search(r"^(typedef +)?enum " + \
469 optional_c_identifier + \
470 r" *{", line)):
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200471 state = states.IN_BRACES
472 elif (state == states.OUTSIDE_KEYWORD and
473 re.search(r"^(typedef +)?enum", line)):
474 state = states.IN_BETWEEN
475 elif (state == states.IN_BETWEEN and
476 re.search(r"^{", line)):
477 state = states.IN_BRACES
478 elif (state == states.IN_BRACES and
479 re.search(r"^}", line)):
480 state = states.OUTSIDE_KEYWORD
481 elif (state == states.IN_BRACES and
482 not re.search(r"^ *#", line)):
483 enum_const = re.search(r"^ *(?P<enum_const>\w+)", line)
484 if not enum_const:
485 continue
486
487 enum_consts.append(Match(
488 header_file,
489 line,
490 line_no,
491 enum_const.span("enum_const"),
492 enum_const.group("enum_const")))
493
494 return enum_consts
495
Gilles Peskine44801622021-11-17 20:43:35 +0100496 IGNORED_CHUNK_REGEX = re.compile('|'.join([
497 r'/\*.*?\*/', # block comment entirely on one line
498 r'//.*', # line comment
499 r'(?P<string>")(?:[^\\\"]|\\.)*"', # string literal
500 ]))
501
Gilles Peskinedf306652021-11-17 20:32:31 +0100502 def strip_comments_and_literals(self, line, in_block_comment):
503 """Strip comments and string literals from line.
504
505 Continuation lines are not supported.
506
507 If in_block_comment is true, assume that the line starts inside a
508 block comment.
509
510 Return updated values of (line, in_block_comment) where:
511 * Comments in line have been replaced by a space (or nothing at the
512 start or end of the line).
513 * String contents have been removed.
514 * in_block_comment indicates whether the line ends inside a block
515 comment that continues on the next line.
516 """
Gilles Peskine23b40962021-11-17 20:45:39 +0100517
518 # Terminate current multiline comment?
Gilles Peskinedf306652021-11-17 20:32:31 +0100519 if in_block_comment:
Gilles Peskine23b40962021-11-17 20:45:39 +0100520 m = re.search(r"\*/", line)
521 if m:
522 in_block_comment = False
523 line = line[m.end(0):]
524 else:
525 return '', True
Gilles Peskine44801622021-11-17 20:43:35 +0100526
527 # Remove full comments and string literals.
528 # Do it all together to handle cases like "/*" correctly.
529 # Note that continuation lines are not supported.
530 line = re.sub(self.IGNORED_CHUNK_REGEX,
531 lambda s: '""' if s.group('string') else ' ',
Gilles Peskinedf306652021-11-17 20:32:31 +0100532 line)
Gilles Peskine44801622021-11-17 20:43:35 +0100533
Gilles Peskinedf306652021-11-17 20:32:31 +0100534 # Start an unfinished comment?
Gilles Peskine44801622021-11-17 20:43:35 +0100535 # (If `/*` was part of a complete comment, it's already been removed.)
Gilles Peskine23b40962021-11-17 20:45:39 +0100536 m = re.search(r"/\*", line)
Gilles Peskinedf306652021-11-17 20:32:31 +0100537 if m:
538 in_block_comment = True
Gilles Peskine23b40962021-11-17 20:45:39 +0100539 line = line[:m.start(0)]
Gilles Peskine44801622021-11-17 20:43:35 +0100540
Gilles Peskinedf306652021-11-17 20:32:31 +0100541 return line, in_block_comment
542
Gilles Peskinec8fc67f2021-11-17 20:23:18 +0100543 IDENTIFIER_REGEX = re.compile('|'.join([
Gilles Peskineb3f4dd52021-11-16 20:56:47 +0100544 # Match " something(a" or " *something(a". Functions.
545 # Assumptions:
546 # - function definition from return type to one of its arguments is
547 # all on one line
548 # - function definition line only contains alphanumeric, asterisk,
549 # underscore, and open bracket
Gilles Peskinec8fc67f2021-11-17 20:23:18 +0100550 r".* \**(\w+) *\( *\w",
Gilles Peskineb3f4dd52021-11-16 20:56:47 +0100551 # Match "(*something)(".
Gilles Peskinec8fc67f2021-11-17 20:23:18 +0100552 r".*\( *\* *(\w+) *\) *\(",
Gilles Peskineb3f4dd52021-11-16 20:56:47 +0100553 # Match names of named data structures.
Gilles Peskinec8fc67f2021-11-17 20:23:18 +0100554 r"(?:typedef +)?(?:struct|union|enum) +(\w+)(?: *{)?$",
Gilles Peskineb3f4dd52021-11-16 20:56:47 +0100555 # Match names of typedef instances, after closing bracket.
Gilles Peskinec8fc67f2021-11-17 20:23:18 +0100556 r"}? *(\w+)[;[].*",
557 ]))
Gilles Peskineb3f4dd52021-11-16 20:56:47 +0100558 # The regex below is indented for clarity.
Gilles Peskinec8fc67f2021-11-17 20:23:18 +0100559 EXCLUSION_LINES = re.compile("|".join([
560 r"extern +\"C\"",
561 r"(typedef +)?(struct|union|enum)( *{)?$",
562 r"} *;?$",
563 r"$",
564 r"//",
565 r"#",
566 ]))
Gilles Peskineb3f4dd52021-11-16 20:56:47 +0100567
568 def parse_identifiers_in_file(self, header_file, identifiers):
569 """
570 Parse all lines of a header where a function/enum/struct/union/typedef
571 identifier is declared, based on some regex and heuristics. Highly
572 dependent on formatting style.
573
574 Append found matches to the list ``identifiers``.
575 """
576
577 with open(header_file, "r", encoding="utf-8") as header:
578 in_block_comment = False
579 # The previous line variable is used for concatenating lines
580 # when identifiers are formatted and spread across multiple
581 # lines.
582 previous_line = ""
583
584 for line_no, line in enumerate(header):
Gilles Peskinedf306652021-11-17 20:32:31 +0100585 line, in_block_comment = \
586 self.strip_comments_and_literals(line, in_block_comment)
Gilles Peskineb3f4dd52021-11-16 20:56:47 +0100587
Gilles Peskinec8fc67f2021-11-17 20:23:18 +0100588 if self.EXCLUSION_LINES.match(line):
Gilles Peskineb3f4dd52021-11-16 20:56:47 +0100589 previous_line = ""
590 continue
591
592 # If the line contains only space-separated alphanumeric
Gilles Peskine4f04d612021-11-17 20:39:56 +0100593 # characters (or underscore, asterisk, or open parenthesis),
Gilles Peskineb3f4dd52021-11-16 20:56:47 +0100594 # and nothing else, high chance it's a declaration that
595 # continues on the next line
596 if re.search(r"^([\w\*\(]+\s+)+$", line):
597 previous_line += line
598 continue
599
600 # If previous line seemed to start an unfinished declaration
601 # (as above), concat and treat them as one.
602 if previous_line:
603 line = previous_line.strip() + " " + line.strip() + "\n"
604 previous_line = ""
605
606 # Skip parsing if line has a space in front = heuristic to
607 # skip function argument lines (highly subject to formatting
608 # changes)
609 if line[0] == " ":
610 continue
611
612 identifier = self.IDENTIFIER_REGEX.search(line)
613
614 if not identifier:
615 continue
616
617 # Find the group that matched, and append it
618 for group in identifier.groups():
619 if not group:
620 continue
621
622 identifiers.append(Match(
623 header_file,
624 line,
625 line_no,
626 identifier.span(),
627 group))
628
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200629 def parse_identifiers(self, include, exclude=None):
630 """
631 Parse all lines of a header where a function/enum/struct/union/typedef
632 identifier is declared, based on some regex and heuristics. Highly
Aditya Deshpande94375c82023-01-25 17:00:12 +0000633 dependent on formatting style. Identifiers in excluded files are still
634 parsed
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200635 Args:
636 * include: A List of glob expressions to look for files through.
637 * exclude: A List of glob expressions for excluding files.
Aditya Deshpande94375c82023-01-25 17:00:12 +0000638 Returns: a Tuple of two Lists of Match objects with identifiers.
639 * included_identifiers: A List of Match objects with identifiers from
640 included files.
641 * excluded_identifiers: A List of Match objects with identifiers from
642 excluded files.
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200643 """
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200644
Aditya Deshpande94375c82023-01-25 17:00:12 +0000645 included_files, excluded_files = \
646 self.get_all_files(include, exclude)
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200647
Aditya Deshpande94375c82023-01-25 17:00:12 +0000648 self.log.debug("Looking for included identifiers in {} files".format \
649 (len(included_files)))
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200650
Aditya Deshpande94375c82023-01-25 17:00:12 +0000651 included_identifiers = []
652 excluded_identifiers = []
653 for header_file in included_files:
654 self.parse_identifiers_in_file(header_file, included_identifiers)
655 for header_file in excluded_files:
656 self.parse_identifiers_in_file(header_file, excluded_identifiers)
657
658 return (included_identifiers, excluded_identifiers)
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200659
660 def parse_symbols(self):
661 """
662 Compile the Mbed TLS libraries, and parse the TLS, Crypto, and x509
663 object files using nm to retrieve the list of referenced symbols.
664 Exceptions thrown here are rethrown because they would be critical
665 errors that void several tests, and thus needs to halt the program. This
666 is explicitly done for clarity.
667
668 Returns a List of unique symbols defined and used in the libraries.
669 """
670 self.log.info("Compiling...")
671 symbols = []
672
Tom Cosgrove49f99bc2022-12-04 16:44:21 +0000673 # Back up the config and atomically compile with the full configuration.
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200674 shutil.copy(
Gilles Peskined47f6362021-09-27 20:12:00 +0200675 "include/mbedtls/config.h",
676 "include/mbedtls/config.h.bak"
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200677 )
678 try:
679 # Use check=True in all subprocess calls so that failures are raised
680 # as exceptions and logged.
681 subprocess.run(
682 ["python3", "scripts/config.py", "full"],
683 universal_newlines=True,
684 check=True
685 )
686 my_environment = os.environ.copy()
687 my_environment["CFLAGS"] = "-fno-asynchronous-unwind-tables"
688 # Run make clean separately to lib to prevent unwanted behavior when
689 # make is invoked with parallelism.
690 subprocess.run(
691 ["make", "clean"],
692 universal_newlines=True,
693 check=True
694 )
695 subprocess.run(
696 ["make", "lib"],
697 env=my_environment,
698 universal_newlines=True,
699 stdout=subprocess.PIPE,
700 stderr=subprocess.STDOUT,
701 check=True
702 )
703
704 # Perform object file analysis using nm
705 symbols = self.parse_symbols_from_nm([
706 "library/libmbedcrypto.a",
707 "library/libmbedtls.a",
708 "library/libmbedx509.a"
709 ])
710
711 subprocess.run(
712 ["make", "clean"],
713 universal_newlines=True,
714 check=True
715 )
716 except subprocess.CalledProcessError as error:
717 self.log.debug(error.output)
718 raise error
719 finally:
720 # Put back the original config regardless of there being errors.
721 # Works also for keyboard interrupts.
722 shutil.move(
Gilles Peskined47f6362021-09-27 20:12:00 +0200723 "include/mbedtls/config.h.bak",
724 "include/mbedtls/config.h"
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200725 )
726
727 return symbols
728
729 def parse_symbols_from_nm(self, object_files):
730 """
731 Run nm to retrieve the list of referenced symbols in each object file.
732 Does not return the position data since it is of no use.
733
734 Args:
735 * object_files: a List of compiled object filepaths to search through.
736
737 Returns a List of unique symbols defined and used in any of the object
738 files.
739 """
740 nm_undefined_regex = re.compile(r"^\S+: +U |^$|^\S+:$")
741 nm_valid_regex = re.compile(r"^\S+( [0-9A-Fa-f]+)* . _*(?P<symbol>\w+)")
742 exclusions = ("FStar", "Hacl")
743
744 symbols = []
745
746 # Gather all outputs of nm
747 nm_output = ""
748 for lib in object_files:
749 nm_output += subprocess.run(
750 ["nm", "-og", lib],
751 universal_newlines=True,
752 stdout=subprocess.PIPE,
753 stderr=subprocess.STDOUT,
754 check=True
755 ).stdout
756
757 for line in nm_output.splitlines():
758 if not nm_undefined_regex.search(line):
759 symbol = nm_valid_regex.search(line)
760 if (symbol and not symbol.group("symbol").startswith(exclusions)):
761 symbols.append(symbol.group("symbol"))
762 else:
763 self.log.error(line)
764
765 return symbols
766
767class NameChecker():
768 """
769 Representation of the core name checking operation performed by this script.
770 """
771 def __init__(self, parse_result, log):
772 self.parse_result = parse_result
773 self.log = log
774
775 def perform_checks(self, quiet=False):
776 """
777 A comprehensive checker that performs each check in order, and outputs
778 a final verdict.
779
780 Args:
781 * quiet: whether to hide detailed problem explanation.
782 """
783 self.log.info("=============")
784 Problem.quiet = quiet
785 problems = 0
786 problems += self.check_symbols_declared_in_header()
787
788 pattern_checks = [
789 ("macros", MACRO_PATTERN),
790 ("enum_consts", CONSTANTS_PATTERN),
791 ("identifiers", IDENTIFIER_PATTERN)
792 ]
793 for group, check_pattern in pattern_checks:
794 problems += self.check_match_pattern(group, check_pattern)
795
796 problems += self.check_for_typos()
797
798 self.log.info("=============")
799 if problems > 0:
800 self.log.info("FAIL: {0} problem(s) to fix".format(str(problems)))
801 if quiet:
802 self.log.info("Remove --quiet to see explanations.")
803 else:
804 self.log.info("Use --quiet for minimal output.")
805 return 1
806 else:
807 self.log.info("PASS")
808 return 0
809
810 def check_symbols_declared_in_header(self):
811 """
812 Perform a check that all detected symbols in the library object files
813 are properly declared in headers.
814 Assumes parse_names_in_source() was called before this.
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200815 Returns the number of problems that need fixing.
816 """
817 problems = []
Aditya Deshpande94375c82023-01-25 17:00:12 +0000818 all_identifiers = self.parse_result["identifiers"] + \
819 self.parse_result["excluded_identifiers"]
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200820
821 for symbol in self.parse_result["symbols"]:
822 found_symbol_declared = False
Aditya Deshpande94375c82023-01-25 17:00:12 +0000823 for identifier_match in all_identifiers:
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200824 if symbol == identifier_match.name:
825 found_symbol_declared = True
826 break
827
828 if not found_symbol_declared:
829 problems.append(SymbolNotInHeader(symbol))
830
831 self.output_check_result("All symbols in header", problems)
832 return len(problems)
833
834 def check_match_pattern(self, group_to_check, check_pattern):
835 """
836 Perform a check that all items of a group conform to a regex pattern.
837 Assumes parse_names_in_source() was called before this.
838
839 Args:
840 * group_to_check: string key to index into self.parse_result.
841 * check_pattern: the regex to check against.
842
843 Returns the number of problems that need fixing.
844 """
845 problems = []
846
847 for item_match in self.parse_result[group_to_check]:
848 if not re.search(check_pattern, item_match.name):
849 problems.append(PatternMismatch(check_pattern, item_match))
850 # Double underscore should not be used for names
851 if re.search(r".*__.*", item_match.name):
852 problems.append(
853 PatternMismatch("no double underscore allowed", item_match))
854
855 self.output_check_result(
856 "Naming patterns of {}".format(group_to_check),
857 problems)
858 return len(problems)
859
860 def check_for_typos(self):
861 """
Shaun Case0e7791f2021-12-20 21:14:10 -0800862 Perform a check that all words in the source code beginning with MBED are
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200863 either defined as macros, or as enum constants.
864 Assumes parse_names_in_source() was called before this.
865
866 Returns the number of problems that need fixing.
867 """
868 problems = []
869
870 # Set comprehension, equivalent to a list comprehension wrapped by set()
871 all_caps_names = {
872 match.name
873 for match
Pengyu Lv018b2f62022-11-08 15:55:00 +0800874 in self.parse_result["macros"] +
875 self.parse_result["private_macros"] +
876 self.parse_result["enum_consts"]
877 }
Ronald Cronb814bda2021-09-13 14:50:42 +0200878 typo_exclusion = re.compile(r"XXX|__|_$|^MBEDTLS_.*CONFIG_FILE$|"
Pengyu Lvfda7f502022-11-08 16:56:51 +0800879 r"MBEDTLS_TEST_LIBTESTDRIVER*|"
880 r"PSA_CRYPTO_DRIVER_TEST")
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200881
Pengyu Lv018b2f62022-11-08 15:55:00 +0800882 for name_match in self.parse_result["mbed_psa_words"]:
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200883 found = name_match.name in all_caps_names
884
885 # Since MBEDTLS_PSA_ACCEL_XXX defines are defined by the
886 # PSA driver, they will not exist as macros. However, they
887 # should still be checked for typos using the equivalent
888 # BUILTINs that exist.
889 if "MBEDTLS_PSA_ACCEL_" in name_match.name:
890 found = name_match.name.replace(
891 "MBEDTLS_PSA_ACCEL_",
892 "MBEDTLS_PSA_BUILTIN_") in all_caps_names
893
894 if not found and not typo_exclusion.search(name_match.name):
895 problems.append(Typo(name_match))
896
897 self.output_check_result("Likely typos", problems)
898 return len(problems)
899
900 def output_check_result(self, name, problems):
901 """
902 Write out the PASS/FAIL status of a performed check depending on whether
903 there were problems.
904
905 Args:
906 * name: the name of the test
907 * problems: a List of encountered Problems
908 """
909 if problems:
910 self.log.info("{}: FAIL\n".format(name))
911 for problem in problems:
912 self.log.warning(str(problem))
913 else:
914 self.log.info("{}: PASS".format(name))
915
916def main():
917 """
918 Perform argument parsing, and create an instance of CodeParser and
919 NameChecker to begin the core operation.
920 """
921 parser = argparse.ArgumentParser(
922 formatter_class=argparse.RawDescriptionHelpFormatter,
923 description=(
924 "This script confirms that the naming of all symbols and identifiers "
925 "in Mbed TLS are consistent with the house style and are also "
926 "self-consistent.\n\n"
927 "Expected to be run from the MbedTLS root directory.")
928 )
929 parser.add_argument(
930 "-v", "--verbose",
931 action="store_true",
932 help="show parse results"
933 )
934 parser.add_argument(
935 "-q", "--quiet",
936 action="store_true",
Tom Cosgrove49f99bc2022-12-04 16:44:21 +0000937 help="hide unnecessary text, explanations, and highlights"
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200938 )
939
940 args = parser.parse_args()
941
942 # Configure the global logger, which is then passed to the classes below
943 log = logging.getLogger()
944 log.setLevel(logging.DEBUG if args.verbose else logging.INFO)
945 log.addHandler(logging.StreamHandler())
946
947 try:
948 code_parser = CodeParser(log)
949 parse_result = code_parser.comprehensive_parse()
950 except Exception: # pylint: disable=broad-except
951 traceback.print_exc()
952 sys.exit(2)
953
954 name_checker = NameChecker(parse_result, log)
955 return_code = name_checker.perform_checks(quiet=args.quiet)
956
957 sys.exit(return_code)
958
959if __name__ == "__main__":
960 main()