blob: 0f5427574be9cb44d169ddf83f2803435b3d73b0 [file] [log] [blame]
Minos Galanakis2c824b42025-03-20 09:28:45 +00001#!/usr/bin/env python3
2#
3# Copyright The Mbed TLS Contributors
4# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
5
6"""
7This script confirms that the naming of all symbols and identifiers in Mbed TLS
8are consistent with the house style and are also self-consistent. It only runs
9on Linux and macOS since it depends on nm.
10
11It contains three major Python classes, TFPSACryptoCodeParser,
12MBEDTLSCodeParser and NameChecker. They all have a comprehensive "run-all"
13function (comprehensive_parse() and perform_checks()) but the individual
14functions can also be used for specific needs.
15
16CodeParser(a inherent base class for TFPSACryptoCodeParser and MBEDTLSCodeParser)
17makes heavy use of regular expressions to parse the code, and is dependent on
18the current code formatting. Many Python C parser libraries require
19preprocessed C code, which means no macro parsing. Compiler tools are also not
20very helpful when we want the exact location in the original source (which
21becomes impossible when e.g. comments are stripped).
22
23NameChecker performs the following checks:
24
25- All exported and available symbols in the library object files, are explicitly
26 declared in the header files. This uses the nm command.
27- All macros, constants, and identifiers (function names, struct names, etc)
28 follow the required regex pattern.
29- Typo checking: All words that begin with MBED|PSA exist as macros or constants.
30
31The script returns 0 on success, 1 on test failure, and 2 if there is a script
32error. It must be run from Mbed TLS root.
33"""
34
35import abc
36import argparse
37import fnmatch
38import glob
39import textwrap
40import os
41import sys
42import traceback
43import re
44import enum
45import shutil
46import subprocess
47import logging
48import tempfile
49
50import project_scripts # pylint: disable=unused-import
51from mbedtls_framework import build_tree
52
53
54# Naming patterns to check against. These are defined outside the NameCheck
55# class for ease of modification.
56PUBLIC_MACRO_PATTERN = r"^(MBEDTLS|PSA|TF_PSA)_[0-9A-Z_]*[0-9A-Z]$"
57INTERNAL_MACRO_PATTERN = r"^[0-9A-Za-z_]*[0-9A-Z]$"
58CONSTANTS_PATTERN = PUBLIC_MACRO_PATTERN
59IDENTIFIER_PATTERN = r"^(mbedtls|psa)_[0-9a-z_]*[0-9a-z]$"
60
61class Match(): # pylint: disable=too-few-public-methods
62 """
63 A class representing a match, together with its found position.
64
65 Fields:
66 * filename: the file that the match was in.
67 * line: the full line containing the match.
68 * line_no: the line number.
69 * pos: a tuple of (start, end) positions on the line where the match is.
70 * name: the match itself.
71 """
72 def __init__(self, filename, line, line_no, pos, name):
73 # pylint: disable=too-many-arguments
74 self.filename = filename
75 self.line = line
76 self.line_no = line_no
77 self.pos = pos
78 self.name = name
79
80 def __str__(self):
81 """
82 Return a formatted code listing representation of the erroneous line.
83 """
84 gutter = format(self.line_no, "4d")
85 underline = self.pos[0] * " " + (self.pos[1] - self.pos[0]) * "^"
86
87 return (
88 " {0} |\n".format(" " * len(gutter)) +
89 " {0} | {1}".format(gutter, self.line) +
90 " {0} | {1}\n".format(" " * len(gutter), underline)
91 )
92
93class Problem(abc.ABC): # pylint: disable=too-few-public-methods
94 """
95 An abstract parent class representing a form of static analysis error.
96 It extends an Abstract Base Class, which means it is not instantiable, and
97 it also mandates certain abstract methods to be implemented in subclasses.
98 """
99 # Class variable to control the quietness of all problems
100 quiet = False
101 def __init__(self):
102 self.textwrapper = textwrap.TextWrapper()
103 self.textwrapper.width = 80
104 self.textwrapper.initial_indent = " > "
105 self.textwrapper.subsequent_indent = " "
106
107 def __str__(self):
108 """
109 Unified string representation method for all Problems.
110 """
111 if self.__class__.quiet:
112 return self.quiet_output()
113 return self.verbose_output()
114
115 @abc.abstractmethod
116 def quiet_output(self):
117 """
118 The output when --quiet is enabled.
119 """
120 pass
121
122 @abc.abstractmethod
123 def verbose_output(self):
124 """
125 The default output with explanation and code snippet if appropriate.
126 """
127 pass
128
129class SymbolNotInHeader(Problem): # pylint: disable=too-few-public-methods
130 """
131 A problem that occurs when an exported/available symbol in the object file
132 is not explicitly declared in header files. Created with
133 NameCheck.check_symbols_declared_in_header()
134
135 Fields:
136 * symbol_name: the name of the symbol.
137 """
138 def __init__(self, symbol_name):
139 self.symbol_name = symbol_name
140 Problem.__init__(self)
141
142 def quiet_output(self):
143 return "{0}".format(self.symbol_name)
144
145 def verbose_output(self):
146 return self.textwrapper.fill(
147 "'{0}' was found as an available symbol in the output of nm, "
148 "however it was not declared in any header files."
149 .format(self.symbol_name))
150
151class PatternMismatch(Problem): # pylint: disable=too-few-public-methods
152 """
153 A problem that occurs when something doesn't match the expected pattern.
154 Created with NameCheck.check_match_pattern()
155
156 Fields:
157 * pattern: the expected regex pattern
158 * match: the Match object in question
159 """
160 def __init__(self, pattern, match):
161 self.pattern = pattern
162 self.match = match
163 Problem.__init__(self)
164
165
166 def quiet_output(self):
167 return (
168 "{0}:{1}:{2}"
169 .format(self.match.filename, self.match.line_no, self.match.name)
170 )
171
172 def verbose_output(self):
173 return self.textwrapper.fill(
174 "{0}:{1}: '{2}' does not match the required pattern '{3}'."
175 .format(
176 self.match.filename,
177 self.match.line_no,
178 self.match.name,
179 self.pattern
180 )
181 ) + "\n" + str(self.match)
182
183class Typo(Problem): # pylint: disable=too-few-public-methods
184 """
185 A problem that occurs when a word using MBED or PSA doesn't
186 appear to be defined as constants nor enum values. Created with
187 NameCheck.check_for_typos()
188
189 Fields:
190 * match: the Match object of the MBED|PSA name in question.
191 """
192 def __init__(self, match):
193 self.match = match
194 Problem.__init__(self)
195
196 def quiet_output(self):
197 return (
198 "{0}:{1}:{2}"
199 .format(self.match.filename, self.match.line_no, self.match.name)
200 )
201
202 def verbose_output(self):
203 return self.textwrapper.fill(
204 "{0}:{1}: '{2}' looks like a typo. It was not found in any "
205 "macros or any enums. If this is not a typo, put "
206 "//no-check-names after it."
207 .format(self.match.filename, self.match.line_no, self.match.name)
208 ) + "\n" + str(self.match)
209
210class CodeParser():
211 """
212 Class for retrieving files and parsing the code. This can be used
213 independently of the checks that NameChecker performs, for example for
214 list_internal_identifiers.py.
215 """
216 def __init__(self, log):
217 self.log = log
218 if not build_tree.looks_like_root(os.getcwd()):
219 raise Exception("This script must be run from Mbed TLS or TF-PSA-Crypto root")
220
221 # Memo for storing "glob expression": set(filepaths)
222 self.files = {}
223
224 # Globally excluded filenames.
225 # Note that "*" can match directory separators in exclude lists.
226 self.excluded_files = ["*/bn_mul", "*/compat-2.x.h"]
227
228 def _parse(self, all_macros, enum_consts, identifiers,
229 excluded_identifiers, mbed_psa_words, symbols):
230 # pylint: disable=too-many-arguments
231 """
232 Parse macros, enums, identifiers, excluded identifiers, Mbed PSA word and Symbols.
233
234 Returns a dict of parsed item key to the corresponding List of Matches.
235 """
236
237 self.log.info("Parsing source code...")
238 self.log.debug(
239 "The following files are excluded from the search: {}"
240 .format(str(self.excluded_files))
241 )
242
243 # Remove identifier macros like mbedtls_printf or mbedtls_calloc
244 identifiers_justname = [x.name for x in identifiers]
245 actual_macros = {"public": [], "internal": []}
246 for scope in actual_macros:
247 for macro in all_macros[scope]:
248 if macro.name not in identifiers_justname:
249 actual_macros[scope].append(macro)
250
251 self.log.debug("Found:")
252 # Aligns the counts on the assumption that none exceeds 4 digits
253 for scope in actual_macros:
254 self.log.debug(" {:4} Total {} Macros"
255 .format(len(all_macros[scope]), scope))
256 self.log.debug(" {:4} {} Non-identifier Macros"
257 .format(len(actual_macros[scope]), scope))
258 self.log.debug(" {:4} Enum Constants".format(len(enum_consts)))
259 self.log.debug(" {:4} Identifiers".format(len(identifiers)))
260 self.log.debug(" {:4} Exported Symbols".format(len(symbols)))
261 return {
262 "public_macros": actual_macros["public"],
263 "internal_macros": actual_macros["internal"],
264 "private_macros": all_macros["private"],
265 "enum_consts": enum_consts,
266 "identifiers": identifiers,
267 "excluded_identifiers": excluded_identifiers,
268 "symbols": symbols,
269 "mbed_psa_words": mbed_psa_words
270 }
271
272 def is_file_excluded(self, path, exclude_wildcards):
273 """Whether the given file path is excluded."""
274 # exclude_wildcards may be None. Also, consider the global exclusions.
275 exclude_wildcards = (exclude_wildcards or []) + self.excluded_files
276 for pattern in exclude_wildcards:
277 if fnmatch.fnmatch(path, pattern):
278 return True
279 return False
280
281 def get_all_files(self, include_wildcards, exclude_wildcards):
282 """
283 Get all files that match any of the included UNIX-style wildcards
284 and filter them into included and excluded lists.
285 While the check_names script is designed only for use on UNIX/macOS
286 (due to nm), this function alone will work fine on Windows even with
287 forward slashes in the wildcard.
288
289 Args:
290 * include_wildcards: a List of shell-style wildcards to match filepaths.
291 * exclude_wildcards: a List of shell-style wildcards to exclude.
292
293 Returns:
294 * inc_files: A List of relative filepaths for included files.
295 * exc_files: A List of relative filepaths for excluded files.
296 """
297 accumulator = set()
298 all_wildcards = include_wildcards + (exclude_wildcards or [])
299 for wildcard in all_wildcards:
300 accumulator = accumulator.union(glob.iglob(wildcard))
301
302 inc_files = []
303 exc_files = []
304 for path in accumulator:
305 if self.is_file_excluded(path, exclude_wildcards):
306 exc_files.append(path)
307 else:
308 inc_files.append(path)
309 return (inc_files, exc_files)
310
311 def get_included_files(self, include_wildcards, exclude_wildcards):
312 """
313 Get all files that match any of the included UNIX-style wildcards.
314 While the check_names script is designed only for use on UNIX/macOS
315 (due to nm), this function alone will work fine on Windows even with
316 forward slashes in the wildcard.
317
318 Args:
319 * include_wildcards: a List of shell-style wildcards to match filepaths.
320 * exclude_wildcards: a List of shell-style wildcards to exclude.
321
322 Returns a List of relative filepaths.
323 """
324 accumulator = set()
325
326 for include_wildcard in include_wildcards:
327 accumulator = accumulator.union(glob.iglob(include_wildcard))
328
329 return list(path for path in accumulator
330 if not self.is_file_excluded(path, exclude_wildcards))
331
332 def parse_macros(self, include, exclude=None):
333 """
334 Parse all macros defined by #define preprocessor directives.
335
336 Args:
337 * include: A List of glob expressions to look for files through.
338 * exclude: A List of glob expressions for excluding files.
339
340 Returns a List of Match objects for the found macros.
341 """
342 macro_regex = re.compile(r"# *define +(?P<macro>\w+)")
343 exclusions = (
344 "asm", "inline", "EMIT", "_CRT_SECURE_NO_DEPRECATE", "MULADDC_"
345 )
346
347 files = self.get_included_files(include, exclude)
348 self.log.debug("Looking for macros in {} files".format(len(files)))
349
350 macros = []
351 for header_file in files:
352 with open(header_file, "r", encoding="utf-8") as header:
353 for line_no, line in enumerate(header):
354 for macro in macro_regex.finditer(line):
355 if macro.group("macro").startswith(exclusions):
356 continue
357
358 macros.append(Match(
359 header_file,
360 line,
361 line_no,
362 macro.span("macro"),
363 macro.group("macro")))
364
365 return macros
366
367 def parse_mbed_psa_words(self, include, exclude=None):
368 """
369 Parse all words in the file that begin with MBED|PSA, in and out of
370 macros, comments, anything.
371
372 Args:
373 * include: A List of glob expressions to look for files through.
374 * exclude: A List of glob expressions for excluding files.
375
376 Returns a List of Match objects for words beginning with MBED|PSA.
377 """
378 # Typos of TLS are common, hence the broader check below than MBEDTLS.
379 mbed_regex = re.compile(r"\b(MBED.+?|PSA)_[A-Z0-9_]*")
380 exclusions = re.compile(r"// *no-check-names|#error")
381
382 files = self.get_included_files(include, exclude)
383 self.log.debug(
384 "Looking for MBED|PSA words in {} files"
385 .format(len(files))
386 )
387
388 mbed_psa_words = []
389 for filename in files:
390 with open(filename, "r", encoding="utf-8") as fp:
391 for line_no, line in enumerate(fp):
392 if exclusions.search(line):
393 continue
394
395 for name in mbed_regex.finditer(line):
396 mbed_psa_words.append(Match(
397 filename,
398 line,
399 line_no,
400 name.span(0),
401 name.group(0)))
402
403 return mbed_psa_words
404
405 def parse_enum_consts(self, include, exclude=None):
406 """
407 Parse all enum value constants that are declared.
408
409 Args:
410 * include: A List of glob expressions to look for files through.
411 * exclude: A List of glob expressions for excluding files.
412
413 Returns a List of Match objects for the findings.
414 """
415 files = self.get_included_files(include, exclude)
416 self.log.debug("Looking for enum consts in {} files".format(len(files)))
417
418 # Emulate a finite state machine to parse enum declarations.
419 # OUTSIDE_KEYWORD = outside the enum keyword
420 # IN_BRACES = inside enum opening braces
421 # IN_BETWEEN = between enum keyword and opening braces
422 states = enum.Enum("FSM", ["OUTSIDE_KEYWORD", "IN_BRACES", "IN_BETWEEN"])
423 enum_consts = []
424 for header_file in files:
425 state = states.OUTSIDE_KEYWORD
426 with open(header_file, "r", encoding="utf-8") as header:
427 for line_no, line in enumerate(header):
428 # Match typedefs and brackets only when they are at the
429 # beginning of the line -- if they are indented, they might
430 # be sub-structures within structs, etc.
431 optional_c_identifier = r"([_a-zA-Z][_a-zA-Z0-9]*)?"
432 if (state == states.OUTSIDE_KEYWORD and
433 re.search(r"^(typedef +)?enum " + \
434 optional_c_identifier + \
435 r" *{", line)):
436 state = states.IN_BRACES
437 elif (state == states.OUTSIDE_KEYWORD and
438 re.search(r"^(typedef +)?enum", line)):
439 state = states.IN_BETWEEN
440 elif (state == states.IN_BETWEEN and
441 re.search(r"^{", line)):
442 state = states.IN_BRACES
443 elif (state == states.IN_BRACES and
444 re.search(r"^}", line)):
445 state = states.OUTSIDE_KEYWORD
446 elif (state == states.IN_BRACES and
447 not re.search(r"^ *#", line)):
448 enum_const = re.search(r"^ *(?P<enum_const>\w+)", line)
449 if not enum_const:
450 continue
451
452 enum_consts.append(Match(
453 header_file,
454 line,
455 line_no,
456 enum_const.span("enum_const"),
457 enum_const.group("enum_const")))
458
459 return enum_consts
460
461 IGNORED_CHUNK_REGEX = re.compile('|'.join([
462 r'/\*.*?\*/', # block comment entirely on one line
463 r'//.*', # line comment
464 r'(?P<string>")(?:[^\\\"]|\\.)*"', # string literal
465 ]))
466
467 def strip_comments_and_literals(self, line, in_block_comment):
468 """Strip comments and string literals from line.
469
470 Continuation lines are not supported.
471
472 If in_block_comment is true, assume that the line starts inside a
473 block comment.
474
475 Return updated values of (line, in_block_comment) where:
476 * Comments in line have been replaced by a space (or nothing at the
477 start or end of the line).
478 * String contents have been removed.
479 * in_block_comment indicates whether the line ends inside a block
480 comment that continues on the next line.
481 """
482
483 # Terminate current multiline comment?
484 if in_block_comment:
485 m = re.search(r"\*/", line)
486 if m:
487 in_block_comment = False
488 line = line[m.end(0):]
489 else:
490 return '', True
491
492 # Remove full comments and string literals.
493 # Do it all together to handle cases like "/*" correctly.
494 # Note that continuation lines are not supported.
495 line = re.sub(self.IGNORED_CHUNK_REGEX,
496 lambda s: '""' if s.group('string') else ' ',
497 line)
498
499 # Start an unfinished comment?
500 # (If `/*` was part of a complete comment, it's already been removed.)
501 m = re.search(r"/\*", line)
502 if m:
503 in_block_comment = True
504 line = line[:m.start(0)]
505
506 return line, in_block_comment
507
508 IDENTIFIER_REGEX = re.compile('|'.join([
509 # Match " something(a" or " *something(a". Functions.
510 # Assumptions:
511 # - function definition from return type to one of its arguments is
512 # all on one line
513 # - function definition line only contains alphanumeric, asterisk,
514 # underscore, and open bracket
515 r".* \**(\w+) *\( *\w",
516 # Match "(*something)(".
517 r".*\( *\* *(\w+) *\) *\(",
518 # Match names of named data structures.
519 r"(?:typedef +)?(?:struct|union|enum) +(\w+)(?: *{)?$",
520 # Match names of typedef instances, after closing bracket.
521 r"}? *(\w+)[;[].*",
522 ]))
523 # The regex below is indented for clarity.
524 EXCLUSION_LINES = re.compile("|".join([
525 r"extern +\"C\"",
526 r"(typedef +)?(struct|union|enum)( *{)?$",
527 r"} *;?$",
528 r"$",
529 r"//",
530 r"#",
531 ]))
532
533 def parse_identifiers_in_file(self, header_file, identifiers):
534 """
535 Parse all lines of a header where a function/enum/struct/union/typedef
536 identifier is declared, based on some regex and heuristics. Highly
537 dependent on formatting style.
538
539 Append found matches to the list ``identifiers``.
540 """
541
542 with open(header_file, "r", encoding="utf-8") as header:
543 in_block_comment = False
544 # The previous line variable is used for concatenating lines
545 # when identifiers are formatted and spread across multiple
546 # lines.
547 previous_line = ""
548
549 for line_no, line in enumerate(header):
550 line, in_block_comment = \
551 self.strip_comments_and_literals(line, in_block_comment)
552
553 if self.EXCLUSION_LINES.match(line):
554 previous_line = ""
555 continue
556
557 # If the line contains only space-separated alphanumeric
558 # characters (or underscore, asterisk, or open parenthesis),
559 # and nothing else, high chance it's a declaration that
560 # continues on the next line
561 if re.search(r"^([\w\*\(]+\s+)+$", line):
562 previous_line += line
563 continue
564
565 # If previous line seemed to start an unfinished declaration
566 # (as above), concat and treat them as one.
567 if previous_line:
568 line = previous_line.strip() + " " + line.strip() + "\n"
569 previous_line = ""
570
571 # Skip parsing if line has a space in front = heuristic to
572 # skip function argument lines (highly subject to formatting
573 # changes)
574 if line[0] == " ":
575 continue
576
577 identifier = self.IDENTIFIER_REGEX.search(line)
578
579 if not identifier:
580 continue
581
582 # Find the group that matched, and append it
583 for group in identifier.groups():
584 if not group:
585 continue
586
587 identifiers.append(Match(
588 header_file,
589 line,
590 line_no,
591 identifier.span(),
592 group))
593
594 def parse_identifiers(self, include, exclude=None):
595 """
596 Parse all lines of a header where a function/enum/struct/union/typedef
597 identifier is declared, based on some regex and heuristics. Highly
598 dependent on formatting style. Identifiers in excluded files are still
599 parsed
600
601 Args:
602 * include: A List of glob expressions to look for files through.
603 * exclude: A List of glob expressions for excluding files.
604
605 Returns: a Tuple of two Lists of Match objects with identifiers.
606 * included_identifiers: A List of Match objects with identifiers from
607 included files.
608 * excluded_identifiers: A List of Match objects with identifiers from
609 excluded files.
610 """
611
612 included_files, excluded_files = \
613 self.get_all_files(include, exclude)
614
615 self.log.debug("Looking for included identifiers in {} files".format \
616 (len(included_files)))
617
618 included_identifiers = []
619 excluded_identifiers = []
620 for header_file in included_files:
621 self.parse_identifiers_in_file(header_file, included_identifiers)
622 for header_file in excluded_files:
623 self.parse_identifiers_in_file(header_file, excluded_identifiers)
624
625 return (included_identifiers, excluded_identifiers)
626
627 def parse_symbols(self):
628 """
629 Compile a library, and parse the object files using nm to retrieve the
630 list of referenced symbols. Exceptions thrown here are rethrown because
631 they would be critical errors that void several tests, and thus needs
632 to halt the program. This is explicitly done for clarity.
633
634 Returns a List of unique symbols defined and used in the libraries.
635 """
636 raise NotImplementedError("parse_symbols must be implemented by a code parser")
637
638 def comprehensive_parse(self):
639 """
640 (Must be defined as a class method)
641 Comprehensive ("default") function to call each parsing function and
642 retrieve various elements of the code, together with the source location.
643
644 Returns a dict of parsed item key to the corresponding List of Matches.
645 """
646 raise NotImplementedError("comprehension_parse must be implemented by a code parser")
647
648 def parse_symbols_from_nm(self, object_files):
649 """
650 Run nm to retrieve the list of referenced symbols in each object file.
651 Does not return the position data since it is of no use.
652
653 Args:
654 * object_files: a List of compiled object filepaths to search through.
655
656 Returns a List of unique symbols defined and used in any of the object
657 files.
658 """
659 nm_undefined_regex = re.compile(r"^\S+: +U |^$|^\S+:$")
660 nm_valid_regex = re.compile(r"^\S+( [0-9A-Fa-f]+)* . _*(?P<symbol>\w+)")
661 exclusions = ("FStar", "Hacl")
662 symbols = []
663 # Gather all outputs of nm
664 nm_output = ""
665 for lib in object_files:
666 nm_output += subprocess.run(
667 ["nm", "-og", lib],
668 universal_newlines=True,
669 stdout=subprocess.PIPE,
670 stderr=subprocess.STDOUT,
671 check=True
672 ).stdout
673 for line in nm_output.splitlines():
674 if not nm_undefined_regex.search(line):
675 symbol = nm_valid_regex.search(line)
676 if (symbol and not symbol.group("symbol").startswith(exclusions)):
677 symbols.append(symbol.group("symbol"))
678 else:
679 self.log.error(line)
680 return symbols
681
682class TFPSACryptoCodeParser(CodeParser):
683 """
684 Class for retrieving files and parsing TF-PSA-Crypto code. This can be used
685 independently of the checks that NameChecker performs.
686 """
687
688 def __init__(self, log):
689 super().__init__(log)
690 if not build_tree.looks_like_tf_psa_crypto_root(os.getcwd()):
691 raise Exception("This script must be run from TF-PSA-Crypto root.")
692
693 def comprehensive_parse(self):
694 """
695 Comprehensive ("default") function to call each parsing function and
696 retrieve various elements of the code, together with the source location.
697
698 Returns a dict of parsed item key to the corresponding List of Matches.
699 """
700 all_macros = {"public": [], "internal": [], "private":[]}
701 all_macros["public"] = self.parse_macros([
702 "include/psa/*.h",
703 "include/tf-psa-crypto/*.h",
704 "drivers/builtin/include/mbedtls/*.h",
705 "drivers/everest/include/everest/everest.h",
706 "drivers/everest/include/everest/x25519.h"
707 ])
708 all_macros["internal"] = self.parse_macros([
709 "core/*.h",
710 "drivers/builtin/src/*.h",
711 "framework/tests/include/test/drivers/*.h",
712 ])
713 all_macros["private"] = self.parse_macros([
714 "core/*.c",
715 "drivers/builtin/src/*.c",
716 ])
717 enum_consts = self.parse_enum_consts([
718 "include/psa/*.h",
719 "include/tf-psa-crypto/*.h",
720 "drivers/builtin/include/mbedtls/*.h",
721 "core/*.h",
722 "drivers/builtin/src/*.h",
723 "core/*.c",
724 "drivers/builtin/src/*.c",
725 "drivers/everest/include/everest/everest.h",
726 "drivers/everest/include/everest/x25519.h"
727 ])
728 identifiers, excluded_identifiers = self.parse_identifiers([
729 "include/psa/*.h",
730 "include/tf-psa-crypto/*.h",
731 "drivers/builtin/include/mbedtls/*.h",
732 "core/*.h",
733 "drivers/builtin/src/*.h",
734 "drivers/everest/include/everest/everest.h",
735 "drivers/everest/include/everest/x25519.h"
736 ], ["drivers/p256-m/p256-m/p256-m.h"])
737 mbed_psa_words = self.parse_mbed_psa_words([
738 "include/psa/*.h",
739 "include/tf-psa-crypto/*.h",
740 "drivers/builtin/include/mbedtls/*.h",
741 "core/*.h",
742 "drivers/builtin/src/*.h",
743 "drivers/everest/include/everest/everest.h",
744 "drivers/everest/include/everest/x25519.h",
745 "core/*.c",
746 "drivers/builtin/src/*.c",
747 "drivers/everest/library/everest.c",
748 "drivers/everest/library/x25519.c"
749 ], ["core/psa_crypto_driver_wrappers.h"])
750 symbols = self.parse_symbols()
751
752 return self._parse(all_macros, enum_consts, identifiers,
753 excluded_identifiers, mbed_psa_words, symbols)
754
755 def parse_symbols(self):
756 """
757 Compile the TF-PSA-Crypto libraries, and parse the
758 object files using nm to retrieve the list of referenced symbols.
759 Exceptions thrown here are rethrown because they would be critical
760 errors that void several tests, and thus needs to halt the program. This
761 is explicitly done for clarity.
762
763 Returns a List of unique symbols defined and used in the libraries.
764 """
765 self.log.info("Compiling...")
766 symbols = []
767
768 # Back up the config and atomically compile with the full configuration.
769 shutil.copy(
770 "include/psa/crypto_config.h",
771 "include/psa/crypto_config.h.bak"
772 )
773 try:
774 # Use check=True in all subprocess calls so that failures are raised
775 # as exceptions and logged.
776 subprocess.run(
777 ["python3", "scripts/config.py", "full"],
778 universal_newlines=True,
779 check=True
780 )
781 my_environment = os.environ.copy()
782 my_environment["CFLAGS"] = "-fno-asynchronous-unwind-tables"
783
784 source_dir = os.getcwd()
785 build_dir = tempfile.mkdtemp()
786 os.chdir(build_dir)
787 subprocess.run(
788 ["cmake", "-DGEN_FILES=ON", source_dir],
789 universal_newlines=True,
790 check=True
791 )
792 subprocess.run(
793 ["make"],
794 env=my_environment,
795 universal_newlines=True,
796 stdout=subprocess.PIPE,
797 stderr=subprocess.STDOUT,
798 check=True
799 )
800
801 # Perform object file analysis using nm
802 symbols = self.parse_symbols_from_nm([
803 build_dir + "/drivers/builtin/libbuiltin.a",
804 build_dir + "/drivers/p256-m/libp256m.a",
805 build_dir + "/drivers/everest/libeverest.a",
806 build_dir + "/core/libtfpsacrypto.a"
807 ])
808
809 os.chdir(source_dir)
810 shutil.rmtree(build_dir)
811 except subprocess.CalledProcessError as error:
812 self.log.debug(error.output)
813 raise error
814 finally:
815 # Put back the original config regardless of there being errors.
816 # Works also for keyboard interrupts.
817 shutil.move(
818 "include/psa/crypto_config.h.bak",
819 "include/psa/crypto_config.h"
820 )
821
822 return symbols
823
824class MBEDTLSCodeParser(CodeParser):
825 """
826 Class for retrieving files and parsing Mbed TLS code. This can be used
827 independently of the checks that NameChecker performs.
828 """
829
830 def __init__(self, log):
831 super().__init__(log)
832 if not build_tree.looks_like_mbedtls_root(os.getcwd()):
833 raise Exception("This script must be run from Mbed TLS root.")
834
835 def comprehensive_parse(self):
836 """
837 Comprehensive ("default") function to call each parsing function and
838 retrieve various elements of the code, together with the source location.
839
840 Returns a dict of parsed item key to the corresponding List of Matches.
841 """
842 all_macros = {"public": [], "internal": [], "private":[]}
843 # TF-PSA-Crypto is in the same repo in 3.6 so initalise variable here.
844 tf_psa_crypto_parse_result = {}
845
846 if build_tree.is_mbedtls_3_6():
847 all_macros["public"] = self.parse_macros([
848 "include/mbedtls/*.h",
849 "include/psa/*.h",
850 "3rdparty/everest/include/everest/everest.h",
851 "3rdparty/everest/include/everest/x25519.h"
852 ])
853 all_macros["internal"] = self.parse_macros([
854 "library/*.h",
855 "framework/tests/include/test/drivers/*.h",
856 ])
857 all_macros["private"] = self.parse_macros([
858 "library/*.c",
859 ])
860 enum_consts = self.parse_enum_consts([
861 "include/mbedtls/*.h",
862 "include/psa/*.h",
863 "library/*.h",
864 "library/*.c",
865 "3rdparty/everest/include/everest/everest.h",
866 "3rdparty/everest/include/everest/x25519.h"
867 ])
868 identifiers, excluded_identifiers = self.parse_identifiers([
869 "include/mbedtls/*.h",
870 "include/psa/*.h",
871 "library/*.h",
872 "3rdparty/everest/include/everest/everest.h",
873 "3rdparty/everest/include/everest/x25519.h"
874 ], ["3rdparty/p256-m/p256-m/p256-m.h"])
875 mbed_psa_words = self.parse_mbed_psa_words([
876 "include/mbedtls/*.h",
877 "include/psa/*.h",
878 "library/*.h",
879 "3rdparty/everest/include/everest/everest.h",
880 "3rdparty/everest/include/everest/x25519.h",
881 "library/*.c",
882 "3rdparty/everest/library/everest.c",
883 "3rdparty/everest/library/x25519.c"
884 ], ["library/psa_crypto_driver_wrappers.h"])
885 else:
886 all_macros = {"public": [], "internal": [], "private":[]}
887 all_macros["public"] = self.parse_macros([
888 "include/mbedtls/*.h",
889 ])
890 all_macros["internal"] = self.parse_macros([
891 "library/*.h",
892 "framework/tests/include/test/drivers/*.h",
893 ])
894 all_macros["private"] = self.parse_macros([
895 "library/*.c",
896 ])
897 enum_consts = self.parse_enum_consts([
898 "include/mbedtls/*.h",
899 "library/*.h",
900 "library/*.c",
901 ])
902 identifiers, excluded_identifiers = self.parse_identifiers([
903 "include/mbedtls/*.h",
904 "library/*.h",
905 ])
906 mbed_psa_words = self.parse_mbed_psa_words([
907 "include/mbedtls/*.h",
908 "library/*.h",
909 "library/*.c",
910 ])
911 os.chdir("./tf-psa-crypto")
912 tf_psa_crypto_code_parser = TFPSACryptoCodeParser(self.log)
913 tf_psa_crypto_parse_result = tf_psa_crypto_code_parser.comprehensive_parse()
914 os.chdir("../")
915
916 symbols = self.parse_symbols()
917 mbedtls_parse_result = self._parse(all_macros, enum_consts,
918 identifiers, excluded_identifiers,
919 mbed_psa_words, symbols)
920 # Combile results for Mbed TLS and TF-PSA-Crypto
921 for key in tf_psa_crypto_parse_result:
922 mbedtls_parse_result[key] += tf_psa_crypto_parse_result[key]
923 return mbedtls_parse_result
924
925 def parse_symbols(self):
926 """
927 Compile the Mbed TLS libraries, and parse the TLS, Crypto, and x509
928 object files using nm to retrieve the list of referenced symbols.
929 Exceptions thrown here are rethrown because they would be critical
930 errors that void several tests, and thus needs to halt the program. This
931 is explicitly done for clarity.
932
933 Returns a List of unique symbols defined and used in the libraries.
934 """
935 self.log.info("Compiling...")
936 symbols = []
937
938 # Back up the config and atomically compile with the full configuration.
939 shutil.copy(
940 "include/mbedtls/mbedtls_config.h",
941 "include/mbedtls/mbedtls_config.h.bak"
942 )
943 try:
944 # Use check=True in all subprocess calls so that failures are raised
945 # as exceptions and logged.
946 subprocess.run(
947 ["python3", "scripts/config.py", "full"],
948 universal_newlines=True,
949 check=True
950 )
951 my_environment = os.environ.copy()
952 my_environment["CFLAGS"] = "-fno-asynchronous-unwind-tables"
953 # Run make clean separately to lib to prevent unwanted behavior when
954 # make is invoked with parallelism.
955 subprocess.run(
956 ["make", "clean"],
957 universal_newlines=True,
958 check=True
959 )
960 subprocess.run(
961 ["make", "lib"],
962 env=my_environment,
963 universal_newlines=True,
964 stdout=subprocess.PIPE,
965 stderr=subprocess.STDOUT,
966 check=True
967 )
968
969 # Perform object file analysis using nm
970 symbols = self.parse_symbols_from_nm([
971 "library/libmbedcrypto.a",
972 "library/libmbedtls.a",
973 "library/libmbedx509.a"
974 ])
975
976 subprocess.run(
977 ["make", "clean"],
978 universal_newlines=True,
979 check=True
980 )
981 except subprocess.CalledProcessError as error:
982 self.log.debug(error.output)
983 raise error
984 finally:
985 # Put back the original config regardless of there being errors.
986 # Works also for keyboard interrupts.
987 shutil.move(
988 "include/mbedtls/mbedtls_config.h.bak",
989 "include/mbedtls/mbedtls_config.h"
990 )
991
992 return symbols
993
994class NameChecker():
995 """
996 Representation of the core name checking operation performed by this script.
997 """
998 def __init__(self, parse_result, log):
999 self.parse_result = parse_result
1000 self.log = log
1001
1002 def perform_checks(self, quiet=False):
1003 """
1004 A comprehensive checker that performs each check in order, and outputs
1005 a final verdict.
1006
1007 Args:
1008 * quiet: whether to hide detailed problem explanation.
1009 """
1010 self.log.info("=============")
1011 Problem.quiet = quiet
1012 problems = 0
1013 problems += self.check_symbols_declared_in_header()
1014
1015 pattern_checks = [
1016 ("public_macros", PUBLIC_MACRO_PATTERN),
1017 ("internal_macros", INTERNAL_MACRO_PATTERN),
1018 ("enum_consts", CONSTANTS_PATTERN),
1019 ("identifiers", IDENTIFIER_PATTERN)
1020 ]
1021 for group, check_pattern in pattern_checks:
1022 problems += self.check_match_pattern(group, check_pattern)
1023
1024 problems += self.check_for_typos()
1025
1026 self.log.info("=============")
1027 if problems > 0:
1028 self.log.info("FAIL: {0} problem(s) to fix".format(str(problems)))
1029 if quiet:
1030 self.log.info("Remove --quiet to see explanations.")
1031 else:
1032 self.log.info("Use --quiet for minimal output.")
1033 return 1
1034 else:
1035 self.log.info("PASS")
1036 return 0
1037
1038 def check_symbols_declared_in_header(self):
1039 """
1040 Perform a check that all detected symbols in the library object files
1041 are properly declared in headers.
1042 Assumes parse_names_in_source() was called before this.
1043
1044 Returns the number of problems that need fixing.
1045 """
1046 problems = []
1047 all_identifiers = self.parse_result["identifiers"] + \
1048 self.parse_result["excluded_identifiers"]
1049
1050 for symbol in self.parse_result["symbols"]:
1051 found_symbol_declared = False
1052 for identifier_match in all_identifiers:
1053 if symbol == identifier_match.name:
1054 found_symbol_declared = True
1055 break
1056
1057 if not found_symbol_declared:
1058 problems.append(SymbolNotInHeader(symbol))
1059
1060 self.output_check_result("All symbols in header", problems)
1061 return len(problems)
1062
1063 def check_match_pattern(self, group_to_check, check_pattern):
1064 """
1065 Perform a check that all items of a group conform to a regex pattern.
1066 Assumes parse_names_in_source() was called before this.
1067
1068 Args:
1069 * group_to_check: string key to index into self.parse_result.
1070 * check_pattern: the regex to check against.
1071
1072 Returns the number of problems that need fixing.
1073 """
1074 problems = []
1075
1076 for item_match in self.parse_result[group_to_check]:
1077 if not re.search(check_pattern, item_match.name):
1078 problems.append(PatternMismatch(check_pattern, item_match))
1079 # Double underscore should not be used for names
1080 if re.search(r".*__.*", item_match.name):
1081 problems.append(
1082 PatternMismatch("no double underscore allowed", item_match))
1083
1084 self.output_check_result(
1085 "Naming patterns of {}".format(group_to_check),
1086 problems)
1087 return len(problems)
1088
1089 def check_for_typos(self):
1090 """
1091 Perform a check that all words in the source code beginning with MBED are
1092 either defined as macros, or as enum constants.
1093 Assumes parse_names_in_source() was called before this.
1094
1095 Returns the number of problems that need fixing.
1096 """
1097 problems = []
1098
1099 # Set comprehension, equivalent to a list comprehension wrapped by set()
1100 all_caps_names = {
1101 match.name
1102 for match
1103 in self.parse_result["public_macros"] +
1104 self.parse_result["internal_macros"] +
1105 self.parse_result["private_macros"] +
1106 self.parse_result["enum_consts"]
1107 }
1108 typo_exclusion = re.compile(r"XXX|__|_$|^MBEDTLS_.*CONFIG_FILE$|"
1109 r"MBEDTLS_TEST_LIBTESTDRIVER*|"
1110 r"PSA_CRYPTO_DRIVER_TEST")
1111
1112 for name_match in self.parse_result["mbed_psa_words"]:
1113 found = name_match.name in all_caps_names
1114
1115 # Since MBEDTLS_PSA_ACCEL_XXX defines are defined by the
1116 # PSA driver, they will not exist as macros. However, they
1117 # should still be checked for typos using the equivalent
1118 # BUILTINs that exist.
1119 if "MBEDTLS_PSA_ACCEL_" in name_match.name:
1120 found = name_match.name.replace(
1121 "MBEDTLS_PSA_ACCEL_",
1122 "MBEDTLS_PSA_BUILTIN_") in all_caps_names
1123
1124 if not found and not typo_exclusion.search(name_match.name):
1125 problems.append(Typo(name_match))
1126
1127 self.output_check_result("Likely typos", problems)
1128 return len(problems)
1129
1130 def output_check_result(self, name, problems):
1131 """
1132 Write out the PASS/FAIL status of a performed check depending on whether
1133 there were problems.
1134
1135 Args:
1136 * name: the name of the test
1137 * problems: a List of encountered Problems
1138 """
1139 if problems:
1140 self.log.info("{}: FAIL\n".format(name))
1141 for problem in problems:
1142 self.log.warning(str(problem))
1143 else:
1144 self.log.info("{}: PASS".format(name))
1145
1146def main():
1147 """
1148 Perform argument parsing, and create an instance of CodeParser and
1149 NameChecker to begin the core operation.
1150 """
1151 parser = argparse.ArgumentParser(
1152 formatter_class=argparse.RawDescriptionHelpFormatter,
1153 description=(
1154 "This script confirms that the naming of all symbols and identifiers "
1155 "in Mbed TLS are consistent with the house style and are also "
1156 "self-consistent.\n\n"
1157 "Expected to be run from the Mbed TLS root directory.")
1158 )
1159 parser.add_argument(
1160 "-v", "--verbose",
1161 action="store_true",
1162 help="show parse results"
1163 )
1164 parser.add_argument(
1165 "-q", "--quiet",
1166 action="store_true",
1167 help="hide unnecessary text, explanations, and highlights"
1168 )
1169
1170 args = parser.parse_args()
1171
1172 # Configure the global logger, which is then passed to the classes below
1173 log = logging.getLogger()
1174 log.setLevel(logging.DEBUG if args.verbose else logging.INFO)
1175 log.addHandler(logging.StreamHandler())
1176
1177 try:
1178 if build_tree.looks_like_tf_psa_crypto_root(os.getcwd()):
1179 tf_psa_crypto_code_parser = TFPSACryptoCodeParser(log)
1180 parse_result = tf_psa_crypto_code_parser.comprehensive_parse()
1181 elif build_tree.looks_like_mbedtls_root(os.getcwd()):
1182 # Mbed TLS uses TF-PSA-Crypto, so we need to parse TF-PSA-Crypto too
1183 mbedtls_code_parser = MBEDTLSCodeParser(log)
1184 parse_result = mbedtls_code_parser.comprehensive_parse()
1185 else:
1186 raise Exception("This script must be run from Mbed TLS or TF-PSA-Crypto root")
1187 except Exception: # pylint: disable=broad-except
1188 traceback.print_exc()
1189 sys.exit(2)
1190
1191 name_checker = NameChecker(parse_result, log)
1192 return_code = name_checker.perform_checks(quiet=args.quiet)
1193
1194 sys.exit(return_code)
1195
1196if __name__ == "__main__":
1197 main()