Blame - framework/scripts/check_names.py - mirror/mbed-tls

blob: 0f5427574be9cb44d169ddf83f2803435b3d73b0 [file] [log] [blame]

Minos Galanakis	2c824b4	2025-03-20 09:28:45 +0000	[diff] [blame^]	1	#!/usr/bin/env python3
				2	#
				3	# Copyright The Mbed TLS Contributors
				4	# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
				5
				6	"""
				7	This script confirms that the naming of all symbols and identifiers in Mbed TLS
				8	are consistent with the house style and are also self-consistent. It only runs
				9	on Linux and macOS since it depends on nm.
				10
				11	It contains three major Python classes, TFPSACryptoCodeParser,
				12	MBEDTLSCodeParser and NameChecker. They all have a comprehensive "run-all"
				13	function (comprehensive_parse() and perform_checks()) but the individual
				14	functions can also be used for specific needs.
				15
				16	CodeParser(a inherent base class for TFPSACryptoCodeParser and MBEDTLSCodeParser)
				17	makes heavy use of regular expressions to parse the code, and is dependent on
				18	the current code formatting. Many Python C parser libraries require
				19	preprocessed C code, which means no macro parsing. Compiler tools are also not
				20	very helpful when we want the exact location in the original source (which
				21	becomes impossible when e.g. comments are stripped).
				22
				23	NameChecker performs the following checks:
				24
				25	- All exported and available symbols in the library object files, are explicitly
				26	declared in the header files. This uses the nm command.
				27	- All macros, constants, and identifiers (function names, struct names, etc)
				28	follow the required regex pattern.
				29	- Typo checking: All words that begin with MBED\|PSA exist as macros or constants.
				30
				31	The script returns 0 on success, 1 on test failure, and 2 if there is a script
				32	error. It must be run from Mbed TLS root.
				33	"""
				34
				35	import abc
				36	import argparse
				37	import fnmatch
				38	import glob
				39	import textwrap
				40	import os
				41	import sys
				42	import traceback
				43	import re
				44	import enum
				45	import shutil
				46	import subprocess
				47	import logging
				48	import tempfile
				49
				50	import project_scripts # pylint: disable=unused-import
				51	from mbedtls_framework import build_tree
				52
				53
				54	# Naming patterns to check against. These are defined outside the NameCheck
				55	# class for ease of modification.
				56	PUBLIC_MACRO_PATTERN = r"^(MBEDTLS\|PSA\|TF_PSA)_[0-9A-Z_]*[0-9A-Z]$"
				57	INTERNAL_MACRO_PATTERN = r"^[0-9A-Za-z_]*[0-9A-Z]$"
				58	CONSTANTS_PATTERN = PUBLIC_MACRO_PATTERN
				59	IDENTIFIER_PATTERN = r"^(mbedtls\|psa)_[0-9a-z_]*[0-9a-z]$"
				60
				61	class Match(): # pylint: disable=too-few-public-methods
				62	"""
				63	A class representing a match, together with its found position.
				64
				65	Fields:
				66	* filename: the file that the match was in.
				67	* line: the full line containing the match.
				68	* line_no: the line number.
				69	* pos: a tuple of (start, end) positions on the line where the match is.
				70	* name: the match itself.
				71	"""
				72	def __init__(self, filename, line, line_no, pos, name):
				73	# pylint: disable=too-many-arguments
				74	self.filename = filename
				75	self.line = line
				76	self.line_no = line_no
				77	self.pos = pos
				78	self.name = name
				79
				80	def __str__(self):
				81	"""
				82	Return a formatted code listing representation of the erroneous line.
				83	"""
				84	gutter = format(self.line_no, "4d")
				85	underline = self.pos[0] * " " + (self.pos[1] - self.pos[0]) * "^"
				86
				87	return (
				88	" {0} \|\n".format(" " * len(gutter)) +
				89	" {0} \| {1}".format(gutter, self.line) +
				90	" {0} \| {1}\n".format(" " * len(gutter), underline)
				91	)
				92
				93	class Problem(abc.ABC): # pylint: disable=too-few-public-methods
				94	"""
				95	An abstract parent class representing a form of static analysis error.
				96	It extends an Abstract Base Class, which means it is not instantiable, and
				97	it also mandates certain abstract methods to be implemented in subclasses.
				98	"""
				99	# Class variable to control the quietness of all problems
				100	quiet = False
				101	def __init__(self):
				102	self.textwrapper = textwrap.TextWrapper()
				103	self.textwrapper.width = 80
				104	self.textwrapper.initial_indent = " > "
				105	self.textwrapper.subsequent_indent = " "
				106
				107	def __str__(self):
				108	"""
				109	Unified string representation method for all Problems.
				110	"""
				111	if self.__class__.quiet:
				112	return self.quiet_output()
				113	return self.verbose_output()
				114
				115	@abc.abstractmethod
				116	def quiet_output(self):
				117	"""
				118	The output when --quiet is enabled.
				119	"""
				120	pass
				121
				122	@abc.abstractmethod
				123	def verbose_output(self):
				124	"""
				125	The default output with explanation and code snippet if appropriate.
				126	"""
				127	pass
				128
				129	class SymbolNotInHeader(Problem): # pylint: disable=too-few-public-methods
				130	"""
				131	A problem that occurs when an exported/available symbol in the object file
				132	is not explicitly declared in header files. Created with
				133	NameCheck.check_symbols_declared_in_header()
				134
				135	Fields:
				136	* symbol_name: the name of the symbol.
				137	"""
				138	def __init__(self, symbol_name):
				139	self.symbol_name = symbol_name
				140	Problem.__init__(self)
				141
				142	def quiet_output(self):
				143	return "{0}".format(self.symbol_name)
				144
				145	def verbose_output(self):
				146	return self.textwrapper.fill(
				147	"'{0}' was found as an available symbol in the output of nm, "
				148	"however it was not declared in any header files."
				149	.format(self.symbol_name))
				150
				151	class PatternMismatch(Problem): # pylint: disable=too-few-public-methods
				152	"""
				153	A problem that occurs when something doesn't match the expected pattern.
				154	Created with NameCheck.check_match_pattern()
				155
				156	Fields:
				157	* pattern: the expected regex pattern
				158	* match: the Match object in question
				159	"""
				160	def __init__(self, pattern, match):
				161	self.pattern = pattern
				162	self.match = match
				163	Problem.__init__(self)
				164
				165
				166	def quiet_output(self):
				167	return (
				168	"{0}:{1}:{2}"
				169	.format(self.match.filename, self.match.line_no, self.match.name)
				170	)
				171
				172	def verbose_output(self):
				173	return self.textwrapper.fill(
				174	"{0}:{1}: '{2}' does not match the required pattern '{3}'."
				175	.format(
				176	self.match.filename,
				177	self.match.line_no,
				178	self.match.name,
				179	self.pattern
				180	)
				181	) + "\n" + str(self.match)
				182
				183	class Typo(Problem): # pylint: disable=too-few-public-methods
				184	"""
				185	A problem that occurs when a word using MBED or PSA doesn't
				186	appear to be defined as constants nor enum values. Created with
				187	NameCheck.check_for_typos()
				188
				189	Fields:
				190	* match: the Match object of the MBED\|PSA name in question.
				191	"""
				192	def __init__(self, match):
				193	self.match = match
				194	Problem.__init__(self)
				195
				196	def quiet_output(self):
				197	return (
				198	"{0}:{1}:{2}"
				199	.format(self.match.filename, self.match.line_no, self.match.name)
				200	)
				201
				202	def verbose_output(self):
				203	return self.textwrapper.fill(
				204	"{0}:{1}: '{2}' looks like a typo. It was not found in any "
				205	"macros or any enums. If this is not a typo, put "
				206	"//no-check-names after it."
				207	.format(self.match.filename, self.match.line_no, self.match.name)
				208	) + "\n" + str(self.match)
				209
				210	class CodeParser():
				211	"""
				212	Class for retrieving files and parsing the code. This can be used
				213	independently of the checks that NameChecker performs, for example for
				214	list_internal_identifiers.py.
				215	"""
				216	def __init__(self, log):
				217	self.log = log
				218	if not build_tree.looks_like_root(os.getcwd()):
				219	raise Exception("This script must be run from Mbed TLS or TF-PSA-Crypto root")
				220
				221	# Memo for storing "glob expression": set(filepaths)
				222	self.files = {}
				223
				224	# Globally excluded filenames.
				225	# Note that "*" can match directory separators in exclude lists.
				226	self.excluded_files = ["/bn_mul", "/compat-2.x.h"]
				227
				228	def _parse(self, all_macros, enum_consts, identifiers,
				229	excluded_identifiers, mbed_psa_words, symbols):
				230	# pylint: disable=too-many-arguments
				231	"""
				232	Parse macros, enums, identifiers, excluded identifiers, Mbed PSA word and Symbols.
				233
				234	Returns a dict of parsed item key to the corresponding List of Matches.
				235	"""
				236
				237	self.log.info("Parsing source code...")
				238	self.log.debug(
				239	"The following files are excluded from the search: {}"
				240	.format(str(self.excluded_files))
				241	)
				242
				243	# Remove identifier macros like mbedtls_printf or mbedtls_calloc
				244	identifiers_justname = [x.name for x in identifiers]
				245	actual_macros = {"public": [], "internal": []}
				246	for scope in actual_macros:
				247	for macro in all_macros[scope]:
				248	if macro.name not in identifiers_justname:
				249	actual_macros[scope].append(macro)
				250
				251	self.log.debug("Found:")
				252	# Aligns the counts on the assumption that none exceeds 4 digits
				253	for scope in actual_macros:
				254	self.log.debug(" {:4} Total {} Macros"
				255	.format(len(all_macros[scope]), scope))
				256	self.log.debug(" {:4} {} Non-identifier Macros"
				257	.format(len(actual_macros[scope]), scope))
				258	self.log.debug(" {:4} Enum Constants".format(len(enum_consts)))
				259	self.log.debug(" {:4} Identifiers".format(len(identifiers)))
				260	self.log.debug(" {:4} Exported Symbols".format(len(symbols)))
				261	return {
				262	"public_macros": actual_macros["public"],
				263	"internal_macros": actual_macros["internal"],
				264	"private_macros": all_macros["private"],
				265	"enum_consts": enum_consts,
				266	"identifiers": identifiers,
				267	"excluded_identifiers": excluded_identifiers,
				268	"symbols": symbols,
				269	"mbed_psa_words": mbed_psa_words
				270	}
				271
				272	def is_file_excluded(self, path, exclude_wildcards):
				273	"""Whether the given file path is excluded."""
				274	# exclude_wildcards may be None. Also, consider the global exclusions.
				275	exclude_wildcards = (exclude_wildcards or []) + self.excluded_files
				276	for pattern in exclude_wildcards:
				277	if fnmatch.fnmatch(path, pattern):
				278	return True
				279	return False
				280
				281	def get_all_files(self, include_wildcards, exclude_wildcards):
				282	"""
				283	Get all files that match any of the included UNIX-style wildcards
				284	and filter them into included and excluded lists.
				285	While the check_names script is designed only for use on UNIX/macOS
				286	(due to nm), this function alone will work fine on Windows even with
				287	forward slashes in the wildcard.
				288
				289	Args:
				290	* include_wildcards: a List of shell-style wildcards to match filepaths.
				291	* exclude_wildcards: a List of shell-style wildcards to exclude.
				292
				293	Returns:
				294	* inc_files: A List of relative filepaths for included files.
				295	* exc_files: A List of relative filepaths for excluded files.
				296	"""
				297	accumulator = set()
				298	all_wildcards = include_wildcards + (exclude_wildcards or [])
				299	for wildcard in all_wildcards:
				300	accumulator = accumulator.union(glob.iglob(wildcard))
				301
				302	inc_files = []
				303	exc_files = []
				304	for path in accumulator:
				305	if self.is_file_excluded(path, exclude_wildcards):
				306	exc_files.append(path)
				307	else:
				308	inc_files.append(path)
				309	return (inc_files, exc_files)
				310
				311	def get_included_files(self, include_wildcards, exclude_wildcards):
				312	"""
				313	Get all files that match any of the included UNIX-style wildcards.
				314	While the check_names script is designed only for use on UNIX/macOS
				315	(due to nm), this function alone will work fine on Windows even with
				316	forward slashes in the wildcard.
				317
				318	Args:
				319	* include_wildcards: a List of shell-style wildcards to match filepaths.
				320	* exclude_wildcards: a List of shell-style wildcards to exclude.
				321
				322	Returns a List of relative filepaths.
				323	"""
				324	accumulator = set()
				325
				326	for include_wildcard in include_wildcards:
				327	accumulator = accumulator.union(glob.iglob(include_wildcard))
				328
				329	return list(path for path in accumulator
				330	if not self.is_file_excluded(path, exclude_wildcards))
				331
				332	def parse_macros(self, include, exclude=None):
				333	"""
				334	Parse all macros defined by #define preprocessor directives.
				335
				336	Args:
				337	* include: A List of glob expressions to look for files through.
				338	* exclude: A List of glob expressions for excluding files.
				339
				340	Returns a List of Match objects for the found macros.
				341	"""
				342	macro_regex = re.compile(r"# *define +(?P<macro>\w+)")
				343	exclusions = (
				344	"asm", "inline", "EMIT", "_CRT_SECURE_NO_DEPRECATE", "MULADDC_"
				345	)
				346
				347	files = self.get_included_files(include, exclude)
				348	self.log.debug("Looking for macros in {} files".format(len(files)))
				349
				350	macros = []
				351	for header_file in files:
				352	with open(header_file, "r", encoding="utf-8") as header:
				353	for line_no, line in enumerate(header):
				354	for macro in macro_regex.finditer(line):
				355	if macro.group("macro").startswith(exclusions):
				356	continue
				357
				358	macros.append(Match(
				359	header_file,
				360	line,
				361	line_no,
				362	macro.span("macro"),
				363	macro.group("macro")))
				364
				365	return macros
				366
				367	def parse_mbed_psa_words(self, include, exclude=None):
				368	"""
				369	Parse all words in the file that begin with MBED\|PSA, in and out of
				370	macros, comments, anything.
				371
				372	Args:
				373	* include: A List of glob expressions to look for files through.
				374	* exclude: A List of glob expressions for excluding files.
				375
				376	Returns a List of Match objects for words beginning with MBED\|PSA.
				377	"""
				378	# Typos of TLS are common, hence the broader check below than MBEDTLS.
				379	mbed_regex = re.compile(r"\b(MBED.+?\|PSA)_[A-Z0-9_]*")
				380	exclusions = re.compile(r"// *no-check-names\|#error")
				381
				382	files = self.get_included_files(include, exclude)
				383	self.log.debug(
				384	"Looking for MBED\|PSA words in {} files"
				385	.format(len(files))
				386	)
				387
				388	mbed_psa_words = []
				389	for filename in files:
				390	with open(filename, "r", encoding="utf-8") as fp:
				391	for line_no, line in enumerate(fp):
				392	if exclusions.search(line):
				393	continue
				394
				395	for name in mbed_regex.finditer(line):
				396	mbed_psa_words.append(Match(
				397	filename,
				398	line,
				399	line_no,
				400	name.span(0),
				401	name.group(0)))
				402
				403	return mbed_psa_words
				404
				405	def parse_enum_consts(self, include, exclude=None):
				406	"""
				407	Parse all enum value constants that are declared.
				408
				409	Args:
				410	* include: A List of glob expressions to look for files through.
				411	* exclude: A List of glob expressions for excluding files.
				412
				413	Returns a List of Match objects for the findings.
				414	"""
				415	files = self.get_included_files(include, exclude)
				416	self.log.debug("Looking for enum consts in {} files".format(len(files)))
				417
				418	# Emulate a finite state machine to parse enum declarations.
				419	# OUTSIDE_KEYWORD = outside the enum keyword
				420	# IN_BRACES = inside enum opening braces
				421	# IN_BETWEEN = between enum keyword and opening braces
				422	states = enum.Enum("FSM", ["OUTSIDE_KEYWORD", "IN_BRACES", "IN_BETWEEN"])
				423	enum_consts = []
				424	for header_file in files:
				425	state = states.OUTSIDE_KEYWORD
				426	with open(header_file, "r", encoding="utf-8") as header:
				427	for line_no, line in enumerate(header):
				428	# Match typedefs and brackets only when they are at the
				429	# beginning of the line -- if they are indented, they might
				430	# be sub-structures within structs, etc.
				431	optional_c_identifier = r"([_a-zA-Z][_a-zA-Z0-9]*)?"
				432	if (state == states.OUTSIDE_KEYWORD and
				433	re.search(r"^(typedef +)?enum " + \
				434	optional_c_identifier + \
				435	r" *{", line)):
				436	state = states.IN_BRACES
				437	elif (state == states.OUTSIDE_KEYWORD and
				438	re.search(r"^(typedef +)?enum", line)):
				439	state = states.IN_BETWEEN
				440	elif (state == states.IN_BETWEEN and
				441	re.search(r"^{", line)):
				442	state = states.IN_BRACES
				443	elif (state == states.IN_BRACES and
				444	re.search(r"^}", line)):
				445	state = states.OUTSIDE_KEYWORD
				446	elif (state == states.IN_BRACES and
				447	not re.search(r"^ *#", line)):
				448	enum_const = re.search(r"^ *(?P<enum_const>\w+)", line)
				449	if not enum_const:
				450	continue
				451
				452	enum_consts.append(Match(
				453	header_file,
				454	line,
				455	line_no,
				456	enum_const.span("enum_const"),
				457	enum_const.group("enum_const")))
				458
				459	return enum_consts
				460
				461	IGNORED_CHUNK_REGEX = re.compile('\|'.join([
				462	r'/\.?\*/', # block comment entirely on one line
				463	r'//.*', # line comment
				464	r'(?P<string>")(?:[^\\\"]\|\\.)*"', # string literal
				465	]))
				466
				467	def strip_comments_and_literals(self, line, in_block_comment):
				468	"""Strip comments and string literals from line.
				469
				470	Continuation lines are not supported.
				471
				472	If in_block_comment is true, assume that the line starts inside a
				473	block comment.
				474
				475	Return updated values of (line, in_block_comment) where:
				476	* Comments in line have been replaced by a space (or nothing at the
				477	start or end of the line).
				478	* String contents have been removed.
				479	* in_block_comment indicates whether the line ends inside a block
				480	comment that continues on the next line.
				481	"""
				482
				483	# Terminate current multiline comment?
				484	if in_block_comment:
				485	m = re.search(r"\*/", line)
				486	if m:
				487	in_block_comment = False
				488	line = line[m.end(0):]
				489	else:
				490	return '', True
				491
				492	# Remove full comments and string literals.
				493	# Do it all together to handle cases like "/*" correctly.
				494	# Note that continuation lines are not supported.
				495	line = re.sub(self.IGNORED_CHUNK_REGEX,
				496	lambda s: '""' if s.group('string') else ' ',
				497	line)
				498
				499	# Start an unfinished comment?
				500	# (If `/*` was part of a complete comment, it's already been removed.)
				501	m = re.search(r"/\*", line)
				502	if m:
				503	in_block_comment = True
				504	line = line[:m.start(0)]
				505
				506	return line, in_block_comment
				507
				508	IDENTIFIER_REGEX = re.compile('\|'.join([
				509	# Match " something(a" or " *something(a". Functions.
				510	# Assumptions:
				511	# - function definition from return type to one of its arguments is
				512	# all on one line
				513	# - function definition line only contains alphanumeric, asterisk,
				514	# underscore, and open bracket
				515	r".* \*(\w+) \( *\w",
				516	# Match "(*something)(".
				517	r".$ \* (\w+) $ *\(",
				518	# Match names of named data structures.
				519	r"(?:typedef +)?(?:struct\|union\|enum) +(\w+)(?: *{)?$",
				520	# Match names of typedef instances, after closing bracket.
				521	r"}? (\w+)[;[].",
				522	]))
				523	# The regex below is indented for clarity.
				524	EXCLUSION_LINES = re.compile("\|".join([
				525	r"extern +\"C\"",
				526	r"(typedef +)?(struct\|union\|enum)( *{)?$",
				527	r"} *;?$",
				528	r"$",
				529	r"//",
				530	r"#",
				531	]))
				532
				533	def parse_identifiers_in_file(self, header_file, identifiers):
				534	"""
				535	Parse all lines of a header where a function/enum/struct/union/typedef
				536	identifier is declared, based on some regex and heuristics. Highly
				537	dependent on formatting style.
				538
				539	Append found matches to the list ``identifiers``.
				540	"""
				541
				542	with open(header_file, "r", encoding="utf-8") as header:
				543	in_block_comment = False
				544	# The previous line variable is used for concatenating lines
				545	# when identifiers are formatted and spread across multiple
				546	# lines.
				547	previous_line = ""
				548
				549	for line_no, line in enumerate(header):
				550	line, in_block_comment = \
				551	self.strip_comments_and_literals(line, in_block_comment)
				552
				553	if self.EXCLUSION_LINES.match(line):
				554	previous_line = ""
				555	continue
				556
				557	# If the line contains only space-separated alphanumeric
				558	# characters (or underscore, asterisk, or open parenthesis),
				559	# and nothing else, high chance it's a declaration that
				560	# continues on the next line
				561	if re.search(r"^([\w\*\(]+\s+)+$", line):
				562	previous_line += line
				563	continue
				564
				565	# If previous line seemed to start an unfinished declaration
				566	# (as above), concat and treat them as one.
				567	if previous_line:
				568	line = previous_line.strip() + " " + line.strip() + "\n"
				569	previous_line = ""
				570
				571	# Skip parsing if line has a space in front = heuristic to
				572	# skip function argument lines (highly subject to formatting
				573	# changes)
				574	if line[0] == " ":
				575	continue
				576
				577	identifier = self.IDENTIFIER_REGEX.search(line)
				578
				579	if not identifier:
				580	continue
				581
				582	# Find the group that matched, and append it
				583	for group in identifier.groups():
				584	if not group:
				585	continue
				586
				587	identifiers.append(Match(
				588	header_file,
				589	line,
				590	line_no,
				591	identifier.span(),
				592	group))
				593
				594	def parse_identifiers(self, include, exclude=None):
				595	"""
				596	Parse all lines of a header where a function/enum/struct/union/typedef
				597	identifier is declared, based on some regex and heuristics. Highly
				598	dependent on formatting style. Identifiers in excluded files are still
				599	parsed
				600
				601	Args:
				602	* include: A List of glob expressions to look for files through.
				603	* exclude: A List of glob expressions for excluding files.
				604
				605	Returns: a Tuple of two Lists of Match objects with identifiers.
				606	* included_identifiers: A List of Match objects with identifiers from
				607	included files.
				608	* excluded_identifiers: A List of Match objects with identifiers from
				609	excluded files.
				610	"""
				611
				612	included_files, excluded_files = \
				613	self.get_all_files(include, exclude)
				614
				615	self.log.debug("Looking for included identifiers in {} files".format \
				616	(len(included_files)))
				617
				618	included_identifiers = []
				619	excluded_identifiers = []
				620	for header_file in included_files:
				621	self.parse_identifiers_in_file(header_file, included_identifiers)
				622	for header_file in excluded_files:
				623	self.parse_identifiers_in_file(header_file, excluded_identifiers)
				624
				625	return (included_identifiers, excluded_identifiers)
				626
				627	def parse_symbols(self):
				628	"""
				629	Compile a library, and parse the object files using nm to retrieve the
				630	list of referenced symbols. Exceptions thrown here are rethrown because
				631	they would be critical errors that void several tests, and thus needs
				632	to halt the program. This is explicitly done for clarity.
				633
				634	Returns a List of unique symbols defined and used in the libraries.
				635	"""
				636	raise NotImplementedError("parse_symbols must be implemented by a code parser")
				637
				638	def comprehensive_parse(self):
				639	"""
				640	(Must be defined as a class method)
				641	Comprehensive ("default") function to call each parsing function and
				642	retrieve various elements of the code, together with the source location.
				643
				644	Returns a dict of parsed item key to the corresponding List of Matches.
				645	"""
				646	raise NotImplementedError("comprehension_parse must be implemented by a code parser")
				647
				648	def parse_symbols_from_nm(self, object_files):
				649	"""
				650	Run nm to retrieve the list of referenced symbols in each object file.
				651	Does not return the position data since it is of no use.
				652
				653	Args:
				654	* object_files: a List of compiled object filepaths to search through.
				655
				656	Returns a List of unique symbols defined and used in any of the object
				657	files.
				658	"""
				659	nm_undefined_regex = re.compile(r"^\S+: +U \|^$\|^\S+:$")
				660	nm_valid_regex = re.compile(r"^\S+( [0-9A-Fa-f]+)* . _*(?P<symbol>\w+)")
				661	exclusions = ("FStar", "Hacl")
				662	symbols = []
				663	# Gather all outputs of nm
				664	nm_output = ""
				665	for lib in object_files:
				666	nm_output += subprocess.run(
				667	["nm", "-og", lib],
				668	universal_newlines=True,
				669	stdout=subprocess.PIPE,
				670	stderr=subprocess.STDOUT,
				671	check=True
				672	).stdout
				673	for line in nm_output.splitlines():
				674	if not nm_undefined_regex.search(line):
				675	symbol = nm_valid_regex.search(line)
				676	if (symbol and not symbol.group("symbol").startswith(exclusions)):
				677	symbols.append(symbol.group("symbol"))
				678	else:
				679	self.log.error(line)
				680	return symbols
				681
				682	class TFPSACryptoCodeParser(CodeParser):
				683	"""
				684	Class for retrieving files and parsing TF-PSA-Crypto code. This can be used
				685	independently of the checks that NameChecker performs.
				686	"""
				687
				688	def __init__(self, log):
				689	super().__init__(log)
				690	if not build_tree.looks_like_tf_psa_crypto_root(os.getcwd()):
				691	raise Exception("This script must be run from TF-PSA-Crypto root.")
				692
				693	def comprehensive_parse(self):
				694	"""
				695	Comprehensive ("default") function to call each parsing function and
				696	retrieve various elements of the code, together with the source location.
				697
				698	Returns a dict of parsed item key to the corresponding List of Matches.
				699	"""
				700	all_macros = {"public": [], "internal": [], "private":[]}
				701	all_macros["public"] = self.parse_macros([
				702	"include/psa/*.h",
				703	"include/tf-psa-crypto/*.h",
				704	"drivers/builtin/include/mbedtls/*.h",
				705	"drivers/everest/include/everest/everest.h",
				706	"drivers/everest/include/everest/x25519.h"
				707	])
				708	all_macros["internal"] = self.parse_macros([
				709	"core/*.h",
				710	"drivers/builtin/src/*.h",
				711	"framework/tests/include/test/drivers/*.h",
				712	])
				713	all_macros["private"] = self.parse_macros([
				714	"core/*.c",
				715	"drivers/builtin/src/*.c",
				716	])
				717	enum_consts = self.parse_enum_consts([
				718	"include/psa/*.h",
				719	"include/tf-psa-crypto/*.h",
				720	"drivers/builtin/include/mbedtls/*.h",
				721	"core/*.h",
				722	"drivers/builtin/src/*.h",
				723	"core/*.c",
				724	"drivers/builtin/src/*.c",
				725	"drivers/everest/include/everest/everest.h",
				726	"drivers/everest/include/everest/x25519.h"
				727	])
				728	identifiers, excluded_identifiers = self.parse_identifiers([
				729	"include/psa/*.h",
				730	"include/tf-psa-crypto/*.h",
				731	"drivers/builtin/include/mbedtls/*.h",
				732	"core/*.h",
				733	"drivers/builtin/src/*.h",
				734	"drivers/everest/include/everest/everest.h",
				735	"drivers/everest/include/everest/x25519.h"
				736	], ["drivers/p256-m/p256-m/p256-m.h"])
				737	mbed_psa_words = self.parse_mbed_psa_words([
				738	"include/psa/*.h",
				739	"include/tf-psa-crypto/*.h",
				740	"drivers/builtin/include/mbedtls/*.h",
				741	"core/*.h",
				742	"drivers/builtin/src/*.h",
				743	"drivers/everest/include/everest/everest.h",
				744	"drivers/everest/include/everest/x25519.h",
				745	"core/*.c",
				746	"drivers/builtin/src/*.c",
				747	"drivers/everest/library/everest.c",
				748	"drivers/everest/library/x25519.c"
				749	], ["core/psa_crypto_driver_wrappers.h"])
				750	symbols = self.parse_symbols()
				751
				752	return self._parse(all_macros, enum_consts, identifiers,
				753	excluded_identifiers, mbed_psa_words, symbols)
				754
				755	def parse_symbols(self):
				756	"""
				757	Compile the TF-PSA-Crypto libraries, and parse the
				758	object files using nm to retrieve the list of referenced symbols.
				759	Exceptions thrown here are rethrown because they would be critical
				760	errors that void several tests, and thus needs to halt the program. This
				761	is explicitly done for clarity.
				762
				763	Returns a List of unique symbols defined and used in the libraries.
				764	"""
				765	self.log.info("Compiling...")
				766	symbols = []
				767
				768	# Back up the config and atomically compile with the full configuration.
				769	shutil.copy(
				770	"include/psa/crypto_config.h",
				771	"include/psa/crypto_config.h.bak"
				772	)
				773	try:
				774	# Use check=True in all subprocess calls so that failures are raised
				775	# as exceptions and logged.
				776	subprocess.run(
				777	["python3", "scripts/config.py", "full"],
				778	universal_newlines=True,
				779	check=True
				780	)
				781	my_environment = os.environ.copy()
				782	my_environment["CFLAGS"] = "-fno-asynchronous-unwind-tables"
				783
				784	source_dir = os.getcwd()
				785	build_dir = tempfile.mkdtemp()
				786	os.chdir(build_dir)
				787	subprocess.run(
				788	["cmake", "-DGEN_FILES=ON", source_dir],
				789	universal_newlines=True,
				790	check=True
				791	)
				792	subprocess.run(
				793	["make"],
				794	env=my_environment,
				795	universal_newlines=True,
				796	stdout=subprocess.PIPE,
				797	stderr=subprocess.STDOUT,
				798	check=True
				799	)
				800
				801	# Perform object file analysis using nm
				802	symbols = self.parse_symbols_from_nm([
				803	build_dir + "/drivers/builtin/libbuiltin.a",
				804	build_dir + "/drivers/p256-m/libp256m.a",
				805	build_dir + "/drivers/everest/libeverest.a",
				806	build_dir + "/core/libtfpsacrypto.a"
				807	])
				808
				809	os.chdir(source_dir)
				810	shutil.rmtree(build_dir)
				811	except subprocess.CalledProcessError as error:
				812	self.log.debug(error.output)
				813	raise error
				814	finally:
				815	# Put back the original config regardless of there being errors.
				816	# Works also for keyboard interrupts.
				817	shutil.move(
				818	"include/psa/crypto_config.h.bak",
				819	"include/psa/crypto_config.h"
				820	)
				821
				822	return symbols
				823
				824	class MBEDTLSCodeParser(CodeParser):
				825	"""
				826	Class for retrieving files and parsing Mbed TLS code. This can be used
				827	independently of the checks that NameChecker performs.
				828	"""
				829
				830	def __init__(self, log):
				831	super().__init__(log)
				832	if not build_tree.looks_like_mbedtls_root(os.getcwd()):
				833	raise Exception("This script must be run from Mbed TLS root.")
				834
				835	def comprehensive_parse(self):
				836	"""
				837	Comprehensive ("default") function to call each parsing function and
				838	retrieve various elements of the code, together with the source location.
				839
				840	Returns a dict of parsed item key to the corresponding List of Matches.
				841	"""
				842	all_macros = {"public": [], "internal": [], "private":[]}
				843	# TF-PSA-Crypto is in the same repo in 3.6 so initalise variable here.
				844	tf_psa_crypto_parse_result = {}
				845
				846	if build_tree.is_mbedtls_3_6():
				847	all_macros["public"] = self.parse_macros([
				848	"include/mbedtls/*.h",
				849	"include/psa/*.h",
				850	"3rdparty/everest/include/everest/everest.h",
				851	"3rdparty/everest/include/everest/x25519.h"
				852	])
				853	all_macros["internal"] = self.parse_macros([
				854	"library/*.h",
				855	"framework/tests/include/test/drivers/*.h",
				856	])
				857	all_macros["private"] = self.parse_macros([
				858	"library/*.c",
				859	])
				860	enum_consts = self.parse_enum_consts([
				861	"include/mbedtls/*.h",
				862	"include/psa/*.h",
				863	"library/*.h",
				864	"library/*.c",
				865	"3rdparty/everest/include/everest/everest.h",
				866	"3rdparty/everest/include/everest/x25519.h"
				867	])
				868	identifiers, excluded_identifiers = self.parse_identifiers([
				869	"include/mbedtls/*.h",
				870	"include/psa/*.h",
				871	"library/*.h",
				872	"3rdparty/everest/include/everest/everest.h",
				873	"3rdparty/everest/include/everest/x25519.h"
				874	], ["3rdparty/p256-m/p256-m/p256-m.h"])
				875	mbed_psa_words = self.parse_mbed_psa_words([
				876	"include/mbedtls/*.h",
				877	"include/psa/*.h",
				878	"library/*.h",
				879	"3rdparty/everest/include/everest/everest.h",
				880	"3rdparty/everest/include/everest/x25519.h",
				881	"library/*.c",
				882	"3rdparty/everest/library/everest.c",
				883	"3rdparty/everest/library/x25519.c"
				884	], ["library/psa_crypto_driver_wrappers.h"])
				885	else:
				886	all_macros = {"public": [], "internal": [], "private":[]}
				887	all_macros["public"] = self.parse_macros([
				888	"include/mbedtls/*.h",
				889	])
				890	all_macros["internal"] = self.parse_macros([
				891	"library/*.h",
				892	"framework/tests/include/test/drivers/*.h",
				893	])
				894	all_macros["private"] = self.parse_macros([
				895	"library/*.c",
				896	])
				897	enum_consts = self.parse_enum_consts([
				898	"include/mbedtls/*.h",
				899	"library/*.h",
				900	"library/*.c",
				901	])
				902	identifiers, excluded_identifiers = self.parse_identifiers([
				903	"include/mbedtls/*.h",
				904	"library/*.h",
				905	])
				906	mbed_psa_words = self.parse_mbed_psa_words([
				907	"include/mbedtls/*.h",
				908	"library/*.h",
				909	"library/*.c",
				910	])
				911	os.chdir("./tf-psa-crypto")
				912	tf_psa_crypto_code_parser = TFPSACryptoCodeParser(self.log)
				913	tf_psa_crypto_parse_result = tf_psa_crypto_code_parser.comprehensive_parse()
				914	os.chdir("../")
				915
				916	symbols = self.parse_symbols()
				917	mbedtls_parse_result = self._parse(all_macros, enum_consts,
				918	identifiers, excluded_identifiers,
				919	mbed_psa_words, symbols)
				920	# Combile results for Mbed TLS and TF-PSA-Crypto
				921	for key in tf_psa_crypto_parse_result:
				922	mbedtls_parse_result[key] += tf_psa_crypto_parse_result[key]
				923	return mbedtls_parse_result
				924
				925	def parse_symbols(self):
				926	"""
				927	Compile the Mbed TLS libraries, and parse the TLS, Crypto, and x509
				928	object files using nm to retrieve the list of referenced symbols.
				929	Exceptions thrown here are rethrown because they would be critical
				930	errors that void several tests, and thus needs to halt the program. This
				931	is explicitly done for clarity.
				932
				933	Returns a List of unique symbols defined and used in the libraries.
				934	"""
				935	self.log.info("Compiling...")
				936	symbols = []
				937
				938	# Back up the config and atomically compile with the full configuration.
				939	shutil.copy(
				940	"include/mbedtls/mbedtls_config.h",
				941	"include/mbedtls/mbedtls_config.h.bak"
				942	)
				943	try:
				944	# Use check=True in all subprocess calls so that failures are raised
				945	# as exceptions and logged.
				946	subprocess.run(
				947	["python3", "scripts/config.py", "full"],
				948	universal_newlines=True,
				949	check=True
				950	)
				951	my_environment = os.environ.copy()
				952	my_environment["CFLAGS"] = "-fno-asynchronous-unwind-tables"
				953	# Run make clean separately to lib to prevent unwanted behavior when
				954	# make is invoked with parallelism.
				955	subprocess.run(
				956	["make", "clean"],
				957	universal_newlines=True,
				958	check=True
				959	)
				960	subprocess.run(
				961	["make", "lib"],
				962	env=my_environment,
				963	universal_newlines=True,
				964	stdout=subprocess.PIPE,
				965	stderr=subprocess.STDOUT,
				966	check=True
				967	)
				968
				969	# Perform object file analysis using nm
				970	symbols = self.parse_symbols_from_nm([
				971	"library/libmbedcrypto.a",
				972	"library/libmbedtls.a",
				973	"library/libmbedx509.a"
				974	])
				975
				976	subprocess.run(
				977	["make", "clean"],
				978	universal_newlines=True,
				979	check=True
				980	)
				981	except subprocess.CalledProcessError as error:
				982	self.log.debug(error.output)
				983	raise error
				984	finally:
				985	# Put back the original config regardless of there being errors.
				986	# Works also for keyboard interrupts.
				987	shutil.move(
				988	"include/mbedtls/mbedtls_config.h.bak",
				989	"include/mbedtls/mbedtls_config.h"
				990	)
				991
				992	return symbols
				993
				994	class NameChecker():
				995	"""
				996	Representation of the core name checking operation performed by this script.
				997	"""
				998	def __init__(self, parse_result, log):
				999	self.parse_result = parse_result
				1000	self.log = log
				1001
				1002	def perform_checks(self, quiet=False):
				1003	"""
				1004	A comprehensive checker that performs each check in order, and outputs
				1005	a final verdict.
				1006
				1007	Args:
				1008	* quiet: whether to hide detailed problem explanation.
				1009	"""
				1010	self.log.info("=============")
				1011	Problem.quiet = quiet
				1012	problems = 0
				1013	problems += self.check_symbols_declared_in_header()
				1014
				1015	pattern_checks = [
				1016	("public_macros", PUBLIC_MACRO_PATTERN),
				1017	("internal_macros", INTERNAL_MACRO_PATTERN),
				1018	("enum_consts", CONSTANTS_PATTERN),
				1019	("identifiers", IDENTIFIER_PATTERN)
				1020	]
				1021	for group, check_pattern in pattern_checks:
				1022	problems += self.check_match_pattern(group, check_pattern)
				1023
				1024	problems += self.check_for_typos()
				1025
				1026	self.log.info("=============")
				1027	if problems > 0:
				1028	self.log.info("FAIL: {0} problem(s) to fix".format(str(problems)))
				1029	if quiet:
				1030	self.log.info("Remove --quiet to see explanations.")
				1031	else:
				1032	self.log.info("Use --quiet for minimal output.")
				1033	return 1
				1034	else:
				1035	self.log.info("PASS")
				1036	return 0
				1037
				1038	def check_symbols_declared_in_header(self):
				1039	"""
				1040	Perform a check that all detected symbols in the library object files
				1041	are properly declared in headers.
				1042	Assumes parse_names_in_source() was called before this.
				1043
				1044	Returns the number of problems that need fixing.
				1045	"""
				1046	problems = []
				1047	all_identifiers = self.parse_result["identifiers"] + \
				1048	self.parse_result["excluded_identifiers"]
				1049
				1050	for symbol in self.parse_result["symbols"]:
				1051	found_symbol_declared = False
				1052	for identifier_match in all_identifiers:
				1053	if symbol == identifier_match.name:
				1054	found_symbol_declared = True
				1055	break
				1056
				1057	if not found_symbol_declared:
				1058	problems.append(SymbolNotInHeader(symbol))
				1059
				1060	self.output_check_result("All symbols in header", problems)
				1061	return len(problems)
				1062
				1063	def check_match_pattern(self, group_to_check, check_pattern):
				1064	"""
				1065	Perform a check that all items of a group conform to a regex pattern.
				1066	Assumes parse_names_in_source() was called before this.
				1067
				1068	Args:
				1069	* group_to_check: string key to index into self.parse_result.
				1070	* check_pattern: the regex to check against.
				1071
				1072	Returns the number of problems that need fixing.
				1073	"""
				1074	problems = []
				1075
				1076	for item_match in self.parse_result[group_to_check]:
				1077	if not re.search(check_pattern, item_match.name):
				1078	problems.append(PatternMismatch(check_pattern, item_match))
				1079	# Double underscore should not be used for names
				1080	if re.search(r".__.", item_match.name):
				1081	problems.append(
				1082	PatternMismatch("no double underscore allowed", item_match))
				1083
				1084	self.output_check_result(
				1085	"Naming patterns of {}".format(group_to_check),
				1086	problems)
				1087	return len(problems)
				1088
				1089	def check_for_typos(self):
				1090	"""
				1091	Perform a check that all words in the source code beginning with MBED are
				1092	either defined as macros, or as enum constants.
				1093	Assumes parse_names_in_source() was called before this.
				1094
				1095	Returns the number of problems that need fixing.
				1096	"""
				1097	problems = []
				1098
				1099	# Set comprehension, equivalent to a list comprehension wrapped by set()
				1100	all_caps_names = {
				1101	match.name
				1102	for match
				1103	in self.parse_result["public_macros"] +
				1104	self.parse_result["internal_macros"] +
				1105	self.parse_result["private_macros"] +
				1106	self.parse_result["enum_consts"]
				1107	}
				1108	typo_exclusion = re.compile(r"XXX\|__\|_$\|^MBEDTLS_.*CONFIG_FILE$\|"
				1109	r"MBEDTLS_TEST_LIBTESTDRIVER*\|"
				1110	r"PSA_CRYPTO_DRIVER_TEST")
				1111
				1112	for name_match in self.parse_result["mbed_psa_words"]:
				1113	found = name_match.name in all_caps_names
				1114
				1115	# Since MBEDTLS_PSA_ACCEL_XXX defines are defined by the
				1116	# PSA driver, they will not exist as macros. However, they
				1117	# should still be checked for typos using the equivalent
				1118	# BUILTINs that exist.
				1119	if "MBEDTLS_PSA_ACCEL_" in name_match.name:
				1120	found = name_match.name.replace(
				1121	"MBEDTLS_PSA_ACCEL_",
				1122	"MBEDTLS_PSA_BUILTIN_") in all_caps_names
				1123
				1124	if not found and not typo_exclusion.search(name_match.name):
				1125	problems.append(Typo(name_match))
				1126
				1127	self.output_check_result("Likely typos", problems)
				1128	return len(problems)
				1129
				1130	def output_check_result(self, name, problems):
				1131	"""
				1132	Write out the PASS/FAIL status of a performed check depending on whether
				1133	there were problems.
				1134
				1135	Args:
				1136	* name: the name of the test
				1137	* problems: a List of encountered Problems
				1138	"""
				1139	if problems:
				1140	self.log.info("{}: FAIL\n".format(name))
				1141	for problem in problems:
				1142	self.log.warning(str(problem))
				1143	else:
				1144	self.log.info("{}: PASS".format(name))
				1145
				1146	def main():
				1147	"""
				1148	Perform argument parsing, and create an instance of CodeParser and
				1149	NameChecker to begin the core operation.
				1150	"""
				1151	parser = argparse.ArgumentParser(
				1152	formatter_class=argparse.RawDescriptionHelpFormatter,
				1153	description=(
				1154	"This script confirms that the naming of all symbols and identifiers "
				1155	"in Mbed TLS are consistent with the house style and are also "
				1156	"self-consistent.\n\n"
				1157	"Expected to be run from the Mbed TLS root directory.")
				1158	)
				1159	parser.add_argument(
				1160	"-v", "--verbose",
				1161	action="store_true",
				1162	help="show parse results"
				1163	)
				1164	parser.add_argument(
				1165	"-q", "--quiet",
				1166	action="store_true",
				1167	help="hide unnecessary text, explanations, and highlights"
				1168	)
				1169
				1170	args = parser.parse_args()
				1171
				1172	# Configure the global logger, which is then passed to the classes below
				1173	log = logging.getLogger()
				1174	log.setLevel(logging.DEBUG if args.verbose else logging.INFO)
				1175	log.addHandler(logging.StreamHandler())
				1176
				1177	try:
				1178	if build_tree.looks_like_tf_psa_crypto_root(os.getcwd()):
				1179	tf_psa_crypto_code_parser = TFPSACryptoCodeParser(log)
				1180	parse_result = tf_psa_crypto_code_parser.comprehensive_parse()
				1181	elif build_tree.looks_like_mbedtls_root(os.getcwd()):
				1182	# Mbed TLS uses TF-PSA-Crypto, so we need to parse TF-PSA-Crypto too
				1183	mbedtls_code_parser = MBEDTLSCodeParser(log)
				1184	parse_result = mbedtls_code_parser.comprehensive_parse()
				1185	else:
				1186	raise Exception("This script must be run from Mbed TLS or TF-PSA-Crypto root")
				1187	except Exception: # pylint: disable=broad-except
				1188	traceback.print_exc()
				1189	sys.exit(2)
				1190
				1191	name_checker = NameChecker(parse_result, log)
				1192	return_code = name_checker.perform_checks(quiet=args.quiet)
				1193
				1194	sys.exit(return_code)
				1195
				1196	if __name__ == "__main__":
				1197	main()