Blame - tests/scripts/check_names.py - mirror/mbed-tls

blob: ab92c5d6f67bbcbd729546f07605f16ec0159636 [file] [log] [blame]

Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	1	#!/usr/bin/env python3
				2	#
				3	# Copyright The Mbed TLS Contributors
Dave Rodgman	7ff7965	2023-11-03 12:04:52 +0000	[diff] [blame]	4	# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	5
				6	"""
				7	This script confirms that the naming of all symbols and identifiers in Mbed TLS
				8	are consistent with the house style and are also self-consistent. It only runs
				9	on Linux and macOS since it depends on nm.
				10
				11	It contains two major Python classes, CodeParser and NameChecker. They both have
				12	a comprehensive "run-all" function (comprehensive_parse() and perform_checks())
				13	but the individual functions can also be used for specific needs.
				14
				15	CodeParser makes heavy use of regular expressions to parse the code, and is
				16	dependent on the current code formatting. Many Python C parser libraries require
				17	preprocessed C code, which means no macro parsing. Compiler tools are also not
				18	very helpful when we want the exact location in the original source (which
				19	becomes impossible when e.g. comments are stripped).
				20
				21	NameChecker performs the following checks:
				22
				23	- All exported and available symbols in the library object files, are explicitly
				24	declared in the header files. This uses the nm command.
				25	- All macros, constants, and identifiers (function names, struct names, etc)
				26	follow the required regex pattern.
Pengyu Lv	018b2f6	2022-11-08 15:55:00 +0800	[diff] [blame]	27	- Typo checking: All words that begin with MBED\|PSA exist as macros or constants.
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	28
				29	The script returns 0 on success, 1 on test failure, and 2 if there is a script
				30	error. It must be run from Mbed TLS root.
				31	"""
				32
				33	import abc
				34	import argparse
Gilles Peskine	7bf5205	2021-09-27 19:20:17 +0200	[diff] [blame]	35	import fnmatch
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	36	import glob
				37	import textwrap
				38	import os
				39	import sys
				40	import traceback
				41	import re
				42	import enum
				43	import shutil
				44	import subprocess
				45	import logging
				46
Gilles Peskine	7ff4766	2022-09-18 21:17:09 +0200	[diff] [blame]	47	import scripts_path # pylint: disable=unused-import
				48	from mbedtls_dev import build_tree
				49
				50
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	51	# Naming patterns to check against. These are defined outside the NameCheck
				52	# class for ease of modification.
				53	MACRO_PATTERN = r"^(MBEDTLS\|PSA)_[0-9A-Z_]*[0-9A-Z]$"
				54	CONSTANTS_PATTERN = MACRO_PATTERN
				55	IDENTIFIER_PATTERN = r"^(mbedtls\|psa)_[0-9a-z_]*[0-9a-z]$"
				56
				57	class Match(): # pylint: disable=too-few-public-methods
				58	"""
				59	A class representing a match, together with its found position.
				60
				61	Fields:
				62	* filename: the file that the match was in.
				63	* line: the full line containing the match.
				64	* line_no: the line number.
				65	* pos: a tuple of (start, end) positions on the line where the match is.
				66	* name: the match itself.
				67	"""
				68	def __init__(self, filename, line, line_no, pos, name):
				69	# pylint: disable=too-many-arguments
				70	self.filename = filename
				71	self.line = line
				72	self.line_no = line_no
				73	self.pos = pos
				74	self.name = name
				75
				76	def __str__(self):
				77	"""
				78	Return a formatted code listing representation of the erroneous line.
				79	"""
				80	gutter = format(self.line_no, "4d")
				81	underline = self.pos[0] * " " + (self.pos[1] - self.pos[0]) * "^"
				82
				83	return (
				84	" {0} \|\n".format(" " * len(gutter)) +
				85	" {0} \| {1}".format(gutter, self.line) +
				86	" {0} \| {1}\n".format(" " * len(gutter), underline)
				87	)
				88
				89	class Problem(abc.ABC): # pylint: disable=too-few-public-methods
				90	"""
				91	An abstract parent class representing a form of static analysis error.
				92	It extends an Abstract Base Class, which means it is not instantiable, and
				93	it also mandates certain abstract methods to be implemented in subclasses.
				94	"""
				95	# Class variable to control the quietness of all problems
				96	quiet = False
				97	def __init__(self):
				98	self.textwrapper = textwrap.TextWrapper()
				99	self.textwrapper.width = 80
				100	self.textwrapper.initial_indent = " > "
				101	self.textwrapper.subsequent_indent = " "
				102
				103	def __str__(self):
				104	"""
				105	Unified string representation method for all Problems.
				106	"""
				107	if self.__class__.quiet:
				108	return self.quiet_output()
				109	return self.verbose_output()
				110
				111	@abc.abstractmethod
				112	def quiet_output(self):
				113	"""
				114	The output when --quiet is enabled.
				115	"""
				116	pass
				117
				118	@abc.abstractmethod
				119	def verbose_output(self):
				120	"""
				121	The default output with explanation and code snippet if appropriate.
				122	"""
				123	pass
				124
				125	class SymbolNotInHeader(Problem): # pylint: disable=too-few-public-methods
				126	"""
				127	A problem that occurs when an exported/available symbol in the object file
				128	is not explicitly declared in header files. Created with
				129	NameCheck.check_symbols_declared_in_header()
				130
				131	Fields:
				132	* symbol_name: the name of the symbol.
				133	"""
				134	def __init__(self, symbol_name):
				135	self.symbol_name = symbol_name
				136	Problem.__init__(self)
				137
				138	def quiet_output(self):
				139	return "{0}".format(self.symbol_name)
				140
				141	def verbose_output(self):
				142	return self.textwrapper.fill(
				143	"'{0}' was found as an available symbol in the output of nm, "
				144	"however it was not declared in any header files."
				145	.format(self.symbol_name))
				146
				147	class PatternMismatch(Problem): # pylint: disable=too-few-public-methods
				148	"""
				149	A problem that occurs when something doesn't match the expected pattern.
				150	Created with NameCheck.check_match_pattern()
				151
				152	Fields:
				153	* pattern: the expected regex pattern
				154	* match: the Match object in question
				155	"""
				156	def __init__(self, pattern, match):
				157	self.pattern = pattern
				158	self.match = match
				159	Problem.__init__(self)
				160
				161
				162	def quiet_output(self):
				163	return (
				164	"{0}:{1}:{2}"
				165	.format(self.match.filename, self.match.line_no, self.match.name)
				166	)
				167
				168	def verbose_output(self):
				169	return self.textwrapper.fill(
				170	"{0}:{1}: '{2}' does not match the required pattern '{3}'."
				171	.format(
				172	self.match.filename,
				173	self.match.line_no,
				174	self.match.name,
				175	self.pattern
				176	)
				177	) + "\n" + str(self.match)
				178
				179	class Typo(Problem): # pylint: disable=too-few-public-methods
				180	"""
Pengyu Lv	018b2f6	2022-11-08 15:55:00 +0800	[diff] [blame]	181	A problem that occurs when a word using MBED or PSA doesn't
				182	appear to be defined as constants nor enum values. Created with
				183	NameCheck.check_for_typos()
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	184
				185	Fields:
Pengyu Lv	018b2f6	2022-11-08 15:55:00 +0800	[diff] [blame]	186	* match: the Match object of the MBED\|PSA name in question.
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	187	"""
				188	def __init__(self, match):
				189	self.match = match
				190	Problem.__init__(self)
				191
				192	def quiet_output(self):
				193	return (
				194	"{0}:{1}:{2}"
				195	.format(self.match.filename, self.match.line_no, self.match.name)
				196	)
				197
				198	def verbose_output(self):
				199	return self.textwrapper.fill(
				200	"{0}:{1}: '{2}' looks like a typo. It was not found in any "
				201	"macros or any enums. If this is not a typo, put "
				202	"//no-check-names after it."
				203	.format(self.match.filename, self.match.line_no, self.match.name)
				204	) + "\n" + str(self.match)
				205
				206	class CodeParser():
				207	"""
				208	Class for retrieving files and parsing the code. This can be used
				209	independently of the checks that NameChecker performs, for example for
				210	list_internal_identifiers.py.
				211	"""
				212	def __init__(self, log):
				213	self.log = log
Gilles Peskine	7ff4766	2022-09-18 21:17:09 +0200	[diff] [blame]	214	build_tree.check_repo_path()
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	215
				216	# Memo for storing "glob expression": set(filepaths)
				217	self.files = {}
				218
Gilles Peskine	7bf5205	2021-09-27 19:20:17 +0200	[diff] [blame]	219	# Globally excluded filenames.
				220	# Note that "*" can match directory separators in exclude lists.
Gilles Peskine	d47f636	2021-09-27 20:12:00 +0200	[diff] [blame]	221	self.excluded_files = ["/bn_mul", "/compat-1.3.h"]
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	222
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	223	def comprehensive_parse(self):
				224	"""
				225	Comprehensive ("default") function to call each parsing function and
				226	retrieve various elements of the code, together with the source location.
				227
				228	Returns a dict of parsed item key to the corresponding List of Matches.
				229	"""
				230	self.log.info("Parsing source code...")
				231	self.log.debug(
				232	"The following files are excluded from the search: {}"
				233	.format(str(self.excluded_files))
				234	)
				235
				236	all_macros = self.parse_macros([
				237	"include/mbedtls/*.h",
				238	"include/psa/*.h",
				239	"library/*.h",
				240	"tests/include/test/drivers/*.h",
				241	"3rdparty/everest/include/everest/everest.h",
				242	"3rdparty/everest/include/everest/x25519.h"
				243	])
Pengyu Lv	018b2f6	2022-11-08 15:55:00 +0800	[diff] [blame]	244	private_macros = self.parse_macros([
				245	"library/*.c",
				246	])
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	247	enum_consts = self.parse_enum_consts([
				248	"include/mbedtls/*.h",
Pengyu Lv	018b2f6	2022-11-08 15:55:00 +0800	[diff] [blame]	249	"include/psa/*.h",
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	250	"library/*.h",
Pengyu Lv	018b2f6	2022-11-08 15:55:00 +0800	[diff] [blame]	251	"library/*.c",
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	252	"3rdparty/everest/include/everest/everest.h",
				253	"3rdparty/everest/include/everest/x25519.h"
				254	])
Aditya Deshpande	94375c8	2023-01-25 17:00:12 +0000	[diff] [blame]	255	identifiers, excluded_identifiers = self.parse_identifiers([
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	256	"include/mbedtls/*.h",
				257	"include/psa/*.h",
				258	"library/*.h",
				259	"3rdparty/everest/include/everest/everest.h",
				260	"3rdparty/everest/include/everest/x25519.h"
				261	])
Pengyu Lv	018b2f6	2022-11-08 15:55:00 +0800	[diff] [blame]	262	mbed_psa_words = self.parse_mbed_psa_words([
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	263	"include/mbedtls/*.h",
				264	"include/psa/*.h",
				265	"library/*.h",
				266	"3rdparty/everest/include/everest/everest.h",
				267	"3rdparty/everest/include/everest/x25519.h",
				268	"library/*.c",
				269	"3rdparty/everest/library/everest.c",
				270	"3rdparty/everest/library/x25519.c"
				271	])
				272	symbols = self.parse_symbols()
				273
				274	# Remove identifier macros like mbedtls_printf or mbedtls_calloc
				275	identifiers_justname = [x.name for x in identifiers]
				276	actual_macros = []
				277	for macro in all_macros:
				278	if macro.name not in identifiers_justname:
				279	actual_macros.append(macro)
				280
				281	self.log.debug("Found:")
				282	# Aligns the counts on the assumption that none exceeds 4 digits
				283	self.log.debug(" {:4} Total Macros".format(len(all_macros)))
				284	self.log.debug(" {:4} Non-identifier Macros".format(len(actual_macros)))
				285	self.log.debug(" {:4} Enum Constants".format(len(enum_consts)))
				286	self.log.debug(" {:4} Identifiers".format(len(identifiers)))
				287	self.log.debug(" {:4} Exported Symbols".format(len(symbols)))
				288	return {
				289	"macros": actual_macros,
Pengyu Lv	018b2f6	2022-11-08 15:55:00 +0800	[diff] [blame]	290	"private_macros": private_macros,
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	291	"enum_consts": enum_consts,
				292	"identifiers": identifiers,
Aditya Deshpande	94375c8	2023-01-25 17:00:12 +0000	[diff] [blame]	293	"excluded_identifiers": excluded_identifiers,
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	294	"symbols": symbols,
Pengyu Lv	018b2f6	2022-11-08 15:55:00 +0800	[diff] [blame]	295	"mbed_psa_words": mbed_psa_words
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	296	}
				297
Gilles Peskine	7bf5205	2021-09-27 19:20:17 +0200	[diff] [blame]	298	def is_file_excluded(self, path, exclude_wildcards):
Gilles Peskine	1c39975	2021-09-28 10:12:49 +0200	[diff] [blame]	299	"""Whether the given file path is excluded."""
Gilles Peskine	7bf5205	2021-09-27 19:20:17 +0200	[diff] [blame]	300	# exclude_wildcards may be None. Also, consider the global exclusions.
				301	exclude_wildcards = (exclude_wildcards or []) + self.excluded_files
				302	for pattern in exclude_wildcards:
				303	if fnmatch.fnmatch(path, pattern):
				304	return True
				305	return False
				306
Aditya Deshpande	94375c8	2023-01-25 17:00:12 +0000	[diff] [blame]	307	def get_all_files(self, include_wildcards, exclude_wildcards):
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	308	"""
Aditya Deshpande	94375c8	2023-01-25 17:00:12 +0000	[diff] [blame]	309	Get all files that match any of the included UNIX-style wildcards
				310	and filter them into included and excluded lists.
				311	While the check_names script is designed only for use on UNIX/macOS
				312	(due to nm), this function alone will work fine on Windows even with
				313	forward slashes in the wildcard.
Aditya Deshpande	7d20bb4	2023-01-27 15:45:32 +0000	[diff] [blame]	314
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	315	Args:
				316	* include_wildcards: a List of shell-style wildcards to match filepaths.
				317	* exclude_wildcards: a List of shell-style wildcards to exclude.
Aditya Deshpande	7d20bb4	2023-01-27 15:45:32 +0000	[diff] [blame]	318
Aditya Deshpande	94375c8	2023-01-25 17:00:12 +0000	[diff] [blame]	319	Returns:
				320	* inc_files: A List of relative filepaths for included files.
				321	* exc_files: A List of relative filepaths for excluded files.
				322	"""
				323	accumulator = set()
				324	all_wildcards = include_wildcards + (exclude_wildcards or [])
				325	for wildcard in all_wildcards:
				326	accumulator = accumulator.union(glob.iglob(wildcard))
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	327
Aditya Deshpande	94375c8	2023-01-25 17:00:12 +0000	[diff] [blame]	328	inc_files = []
				329	exc_files = []
				330	for path in accumulator:
				331	if self.is_file_excluded(path, exclude_wildcards):
				332	exc_files.append(path)
				333	else:
				334	inc_files.append(path)
				335	return (inc_files, exc_files)
				336
				337	def get_included_files(self, include_wildcards, exclude_wildcards):
				338	"""
				339	Get all files that match any of the included UNIX-style wildcards.
				340	While the check_names script is designed only for use on UNIX/macOS
				341	(due to nm), this function alone will work fine on Windows even with
				342	forward slashes in the wildcard.
Aditya Deshpande	7d20bb4	2023-01-27 15:45:32 +0000	[diff] [blame]	343
Aditya Deshpande	94375c8	2023-01-25 17:00:12 +0000	[diff] [blame]	344	Args:
				345	* include_wildcards: a List of shell-style wildcards to match filepaths.
				346	* exclude_wildcards: a List of shell-style wildcards to exclude.
Aditya Deshpande	7d20bb4	2023-01-27 15:45:32 +0000	[diff] [blame]	347
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	348	Returns a List of relative filepaths.
				349	"""
				350	accumulator = set()
				351
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	352	for include_wildcard in include_wildcards:
Gilles Peskine	7bf5205	2021-09-27 19:20:17 +0200	[diff] [blame]	353	accumulator = accumulator.union(glob.iglob(include_wildcard))
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	354
Gilles Peskine	7bf5205	2021-09-27 19:20:17 +0200	[diff] [blame]	355	return list(path for path in accumulator
				356	if not self.is_file_excluded(path, exclude_wildcards))
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	357
				358	def parse_macros(self, include, exclude=None):
				359	"""
				360	Parse all macros defined by #define preprocessor directives.
				361
				362	Args:
				363	* include: A List of glob expressions to look for files through.
				364	* exclude: A List of glob expressions for excluding files.
				365
				366	Returns a List of Match objects for the found macros.
				367	"""
				368	macro_regex = re.compile(r"# *define +(?P<macro>\w+)")
				369	exclusions = (
				370	"asm", "inline", "EMIT", "_CRT_SECURE_NO_DEPRECATE", "MULADDC_"
				371	)
				372
Aditya Deshpande	94375c8	2023-01-25 17:00:12 +0000	[diff] [blame]	373	files = self.get_included_files(include, exclude)
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	374	self.log.debug("Looking for macros in {} files".format(len(files)))
				375
				376	macros = []
				377	for header_file in files:
				378	with open(header_file, "r", encoding="utf-8") as header:
				379	for line_no, line in enumerate(header):
				380	for macro in macro_regex.finditer(line):
				381	if macro.group("macro").startswith(exclusions):
				382	continue
				383
				384	macros.append(Match(
				385	header_file,
				386	line,
				387	line_no,
				388	macro.span("macro"),
				389	macro.group("macro")))
				390
				391	return macros
				392
Pengyu Lv	018b2f6	2022-11-08 15:55:00 +0800	[diff] [blame]	393	def parse_mbed_psa_words(self, include, exclude=None):
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	394	"""
Pengyu Lv	018b2f6	2022-11-08 15:55:00 +0800	[diff] [blame]	395	Parse all words in the file that begin with MBED\|PSA, in and out of
				396	macros, comments, anything.
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	397
				398	Args:
				399	* include: A List of glob expressions to look for files through.
				400	* exclude: A List of glob expressions for excluding files.
				401
Pengyu Lv	018b2f6	2022-11-08 15:55:00 +0800	[diff] [blame]	402	Returns a List of Match objects for words beginning with MBED\|PSA.
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	403	"""
				404	# Typos of TLS are common, hence the broader check below than MBEDTLS.
Pengyu Lv	018b2f6	2022-11-08 15:55:00 +0800	[diff] [blame]	405	mbed_regex = re.compile(r"\b(MBED.+?\|PSA)_[A-Z0-9_]*")
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	406	exclusions = re.compile(r"// *no-check-names\|#error")
				407
Aditya Deshpande	94375c8	2023-01-25 17:00:12 +0000	[diff] [blame]	408	files = self.get_included_files(include, exclude)
Pengyu Lv	018b2f6	2022-11-08 15:55:00 +0800	[diff] [blame]	409	self.log.debug(
				410	"Looking for MBED\|PSA words in {} files"
				411	.format(len(files))
				412	)
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	413
Pengyu Lv	018b2f6	2022-11-08 15:55:00 +0800	[diff] [blame]	414	mbed_psa_words = []
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	415	for filename in files:
				416	with open(filename, "r", encoding="utf-8") as fp:
				417	for line_no, line in enumerate(fp):
				418	if exclusions.search(line):
				419	continue
				420
				421	for name in mbed_regex.finditer(line):
Pengyu Lv	018b2f6	2022-11-08 15:55:00 +0800	[diff] [blame]	422	mbed_psa_words.append(Match(
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	423	filename,
				424	line,
				425	line_no,
				426	name.span(0),
				427	name.group(0)))
				428
Pengyu Lv	018b2f6	2022-11-08 15:55:00 +0800	[diff] [blame]	429	return mbed_psa_words
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	430
				431	def parse_enum_consts(self, include, exclude=None):
				432	"""
				433	Parse all enum value constants that are declared.
				434
				435	Args:
				436	* include: A List of glob expressions to look for files through.
				437	* exclude: A List of glob expressions for excluding files.
				438
				439	Returns a List of Match objects for the findings.
				440	"""
Aditya Deshpande	94375c8	2023-01-25 17:00:12 +0000	[diff] [blame]	441	files = self.get_included_files(include, exclude)
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	442	self.log.debug("Looking for enum consts in {} files".format(len(files)))
				443
				444	# Emulate a finite state machine to parse enum declarations.
				445	# OUTSIDE_KEYWORD = outside the enum keyword
				446	# IN_BRACES = inside enum opening braces
				447	# IN_BETWEEN = between enum keyword and opening braces
				448	states = enum.Enum("FSM", ["OUTSIDE_KEYWORD", "IN_BRACES", "IN_BETWEEN"])
				449	enum_consts = []
				450	for header_file in files:
				451	state = states.OUTSIDE_KEYWORD
				452	with open(header_file, "r", encoding="utf-8") as header:
				453	for line_no, line in enumerate(header):
				454	# Match typedefs and brackets only when they are at the
				455	# beginning of the line -- if they are indented, they might
				456	# be sub-structures within structs, etc.
David Horstmann	e1e776c	2022-12-16 13:39:04 +0000	[diff] [blame]	457	optional_c_identifier = r"([_a-zA-Z][_a-zA-Z0-9]*)?"
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	458	if (state == states.OUTSIDE_KEYWORD and
David Horstmann	e1e776c	2022-12-16 13:39:04 +0000	[diff] [blame]	459	re.search(r"^(typedef +)?enum " + \
				460	optional_c_identifier + \
				461	r" *{", line)):
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	462	state = states.IN_BRACES
				463	elif (state == states.OUTSIDE_KEYWORD and
				464	re.search(r"^(typedef +)?enum", line)):
				465	state = states.IN_BETWEEN
				466	elif (state == states.IN_BETWEEN and
				467	re.search(r"^{", line)):
				468	state = states.IN_BRACES
				469	elif (state == states.IN_BRACES and
				470	re.search(r"^}", line)):
				471	state = states.OUTSIDE_KEYWORD
				472	elif (state == states.IN_BRACES and
				473	not re.search(r"^ *#", line)):
				474	enum_const = re.search(r"^ *(?P<enum_const>\w+)", line)
				475	if not enum_const:
				476	continue
				477
				478	enum_consts.append(Match(
				479	header_file,
				480	line,
				481	line_no,
				482	enum_const.span("enum_const"),
				483	enum_const.group("enum_const")))
				484
				485	return enum_consts
				486
Gilles Peskine	4480162	2021-11-17 20:43:35 +0100	[diff] [blame]	487	IGNORED_CHUNK_REGEX = re.compile('\|'.join([
				488	r'/\.?\*/', # block comment entirely on one line
				489	r'//.*', # line comment
				490	r'(?P<string>")(?:[^\\\"]\|\\.)*"', # string literal
				491	]))
				492
Gilles Peskine	df30665	2021-11-17 20:32:31 +0100	[diff] [blame]	493	def strip_comments_and_literals(self, line, in_block_comment):
				494	"""Strip comments and string literals from line.
				495
				496	Continuation lines are not supported.
				497
				498	If in_block_comment is true, assume that the line starts inside a
				499	block comment.
				500
				501	Return updated values of (line, in_block_comment) where:
				502	* Comments in line have been replaced by a space (or nothing at the
				503	start or end of the line).
				504	* String contents have been removed.
				505	* in_block_comment indicates whether the line ends inside a block
				506	comment that continues on the next line.
				507	"""
Gilles Peskine	23b4096	2021-11-17 20:45:39 +0100	[diff] [blame]	508
				509	# Terminate current multiline comment?
Gilles Peskine	df30665	2021-11-17 20:32:31 +0100	[diff] [blame]	510	if in_block_comment:
Gilles Peskine	23b4096	2021-11-17 20:45:39 +0100	[diff] [blame]	511	m = re.search(r"\*/", line)
				512	if m:
				513	in_block_comment = False
				514	line = line[m.end(0):]
				515	else:
				516	return '', True
Gilles Peskine	4480162	2021-11-17 20:43:35 +0100	[diff] [blame]	517
				518	# Remove full comments and string literals.
				519	# Do it all together to handle cases like "/*" correctly.
				520	# Note that continuation lines are not supported.
				521	line = re.sub(self.IGNORED_CHUNK_REGEX,
				522	lambda s: '""' if s.group('string') else ' ',
Gilles Peskine	df30665	2021-11-17 20:32:31 +0100	[diff] [blame]	523	line)
Gilles Peskine	4480162	2021-11-17 20:43:35 +0100	[diff] [blame]	524
Gilles Peskine	df30665	2021-11-17 20:32:31 +0100	[diff] [blame]	525	# Start an unfinished comment?
Gilles Peskine	4480162	2021-11-17 20:43:35 +0100	[diff] [blame]	526	# (If `/*` was part of a complete comment, it's already been removed.)
Gilles Peskine	23b4096	2021-11-17 20:45:39 +0100	[diff] [blame]	527	m = re.search(r"/\*", line)
Gilles Peskine	df30665	2021-11-17 20:32:31 +0100	[diff] [blame]	528	if m:
				529	in_block_comment = True
Gilles Peskine	23b4096	2021-11-17 20:45:39 +0100	[diff] [blame]	530	line = line[:m.start(0)]
Gilles Peskine	4480162	2021-11-17 20:43:35 +0100	[diff] [blame]	531
Gilles Peskine	df30665	2021-11-17 20:32:31 +0100	[diff] [blame]	532	return line, in_block_comment
				533
Gilles Peskine	c8fc67f	2021-11-17 20:23:18 +0100	[diff] [blame]	534	IDENTIFIER_REGEX = re.compile('\|'.join([
Gilles Peskine	b3f4dd5	2021-11-16 20:56:47 +0100	[diff] [blame]	535	# Match " something(a" or " *something(a". Functions.
				536	# Assumptions:
				537	# - function definition from return type to one of its arguments is
				538	# all on one line
				539	# - function definition line only contains alphanumeric, asterisk,
				540	# underscore, and open bracket
Gilles Peskine	c8fc67f	2021-11-17 20:23:18 +0100	[diff] [blame]	541	r".* \*(\w+) \( *\w",
Gilles Peskine	b3f4dd5	2021-11-16 20:56:47 +0100	[diff] [blame]	542	# Match "(*something)(".
Gilles Peskine	c8fc67f	2021-11-17 20:23:18 +0100	[diff] [blame]	543	r".$ \* (\w+) $ *\(",
Gilles Peskine	b3f4dd5	2021-11-16 20:56:47 +0100	[diff] [blame]	544	# Match names of named data structures.
Gilles Peskine	c8fc67f	2021-11-17 20:23:18 +0100	[diff] [blame]	545	r"(?:typedef +)?(?:struct\|union\|enum) +(\w+)(?: *{)?$",
Gilles Peskine	b3f4dd5	2021-11-16 20:56:47 +0100	[diff] [blame]	546	# Match names of typedef instances, after closing bracket.
Gilles Peskine	c8fc67f	2021-11-17 20:23:18 +0100	[diff] [blame]	547	r"}? (\w+)[;[].",
				548	]))
Gilles Peskine	b3f4dd5	2021-11-16 20:56:47 +0100	[diff] [blame]	549	# The regex below is indented for clarity.
Gilles Peskine	c8fc67f	2021-11-17 20:23:18 +0100	[diff] [blame]	550	EXCLUSION_LINES = re.compile("\|".join([
				551	r"extern +\"C\"",
				552	r"(typedef +)?(struct\|union\|enum)( *{)?$",
				553	r"} *;?$",
				554	r"$",
				555	r"//",
				556	r"#",
				557	]))
Gilles Peskine	b3f4dd5	2021-11-16 20:56:47 +0100	[diff] [blame]	558
				559	def parse_identifiers_in_file(self, header_file, identifiers):
				560	"""
				561	Parse all lines of a header where a function/enum/struct/union/typedef
				562	identifier is declared, based on some regex and heuristics. Highly
				563	dependent on formatting style.
				564
				565	Append found matches to the list ``identifiers``.
				566	"""
				567
				568	with open(header_file, "r", encoding="utf-8") as header:
				569	in_block_comment = False
				570	# The previous line variable is used for concatenating lines
				571	# when identifiers are formatted and spread across multiple
				572	# lines.
				573	previous_line = ""
				574
				575	for line_no, line in enumerate(header):
Gilles Peskine	df30665	2021-11-17 20:32:31 +0100	[diff] [blame]	576	line, in_block_comment = \
				577	self.strip_comments_and_literals(line, in_block_comment)
Gilles Peskine	b3f4dd5	2021-11-16 20:56:47 +0100	[diff] [blame]	578
Gilles Peskine	c8fc67f	2021-11-17 20:23:18 +0100	[diff] [blame]	579	if self.EXCLUSION_LINES.match(line):
Gilles Peskine	b3f4dd5	2021-11-16 20:56:47 +0100	[diff] [blame]	580	previous_line = ""
				581	continue
				582
				583	# If the line contains only space-separated alphanumeric
Gilles Peskine	4f04d61	2021-11-17 20:39:56 +0100	[diff] [blame]	584	# characters (or underscore, asterisk, or open parenthesis),
Gilles Peskine	b3f4dd5	2021-11-16 20:56:47 +0100	[diff] [blame]	585	# and nothing else, high chance it's a declaration that
				586	# continues on the next line
				587	if re.search(r"^([\w\*\(]+\s+)+$", line):
				588	previous_line += line
				589	continue
				590
				591	# If previous line seemed to start an unfinished declaration
				592	# (as above), concat and treat them as one.
				593	if previous_line:
				594	line = previous_line.strip() + " " + line.strip() + "\n"
				595	previous_line = ""
				596
				597	# Skip parsing if line has a space in front = heuristic to
				598	# skip function argument lines (highly subject to formatting
				599	# changes)
				600	if line[0] == " ":
				601	continue
				602
				603	identifier = self.IDENTIFIER_REGEX.search(line)
				604
				605	if not identifier:
				606	continue
				607
				608	# Find the group that matched, and append it
				609	for group in identifier.groups():
				610	if not group:
				611	continue
				612
				613	identifiers.append(Match(
				614	header_file,
				615	line,
				616	line_no,
				617	identifier.span(),
				618	group))
				619
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	620	def parse_identifiers(self, include, exclude=None):
				621	"""
				622	Parse all lines of a header where a function/enum/struct/union/typedef
				623	identifier is declared, based on some regex and heuristics. Highly
Aditya Deshpande	94375c8	2023-01-25 17:00:12 +0000	[diff] [blame]	624	dependent on formatting style. Identifiers in excluded files are still
				625	parsed
Aditya Deshpande	7d20bb4	2023-01-27 15:45:32 +0000	[diff] [blame]	626
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	627	Args:
				628	* include: A List of glob expressions to look for files through.
				629	* exclude: A List of glob expressions for excluding files.
Aditya Deshpande	7d20bb4	2023-01-27 15:45:32 +0000	[diff] [blame]	630
Aditya Deshpande	94375c8	2023-01-25 17:00:12 +0000	[diff] [blame]	631	Returns: a Tuple of two Lists of Match objects with identifiers.
				632	* included_identifiers: A List of Match objects with identifiers from
				633	included files.
				634	* excluded_identifiers: A List of Match objects with identifiers from
				635	excluded files.
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	636	"""
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	637
Aditya Deshpande	94375c8	2023-01-25 17:00:12 +0000	[diff] [blame]	638	included_files, excluded_files = \
				639	self.get_all_files(include, exclude)
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	640
Aditya Deshpande	94375c8	2023-01-25 17:00:12 +0000	[diff] [blame]	641	self.log.debug("Looking for included identifiers in {} files".format \
				642	(len(included_files)))
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	643
Aditya Deshpande	94375c8	2023-01-25 17:00:12 +0000	[diff] [blame]	644	included_identifiers = []
				645	excluded_identifiers = []
				646	for header_file in included_files:
				647	self.parse_identifiers_in_file(header_file, included_identifiers)
				648	for header_file in excluded_files:
				649	self.parse_identifiers_in_file(header_file, excluded_identifiers)
				650
				651	return (included_identifiers, excluded_identifiers)
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	652
				653	def parse_symbols(self):
				654	"""
				655	Compile the Mbed TLS libraries, and parse the TLS, Crypto, and x509
				656	object files using nm to retrieve the list of referenced symbols.
				657	Exceptions thrown here are rethrown because they would be critical
				658	errors that void several tests, and thus needs to halt the program. This
				659	is explicitly done for clarity.
				660
				661	Returns a List of unique symbols defined and used in the libraries.
				662	"""
				663	self.log.info("Compiling...")
				664	symbols = []
				665
Tom Cosgrove	49f99bc	2022-12-04 16:44:21 +0000	[diff] [blame]	666	# Back up the config and atomically compile with the full configuration.
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	667	shutil.copy(
Gilles Peskine	d47f636	2021-09-27 20:12:00 +0200	[diff] [blame]	668	"include/mbedtls/config.h",
				669	"include/mbedtls/config.h.bak"
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	670	)
				671	try:
				672	# Use check=True in all subprocess calls so that failures are raised
				673	# as exceptions and logged.
				674	subprocess.run(
				675	["python3", "scripts/config.py", "full"],
				676	universal_newlines=True,
				677	check=True
				678	)
				679	my_environment = os.environ.copy()
				680	my_environment["CFLAGS"] = "-fno-asynchronous-unwind-tables"
				681	# Run make clean separately to lib to prevent unwanted behavior when
				682	# make is invoked with parallelism.
				683	subprocess.run(
				684	["make", "clean"],
				685	universal_newlines=True,
				686	check=True
				687	)
				688	subprocess.run(
				689	["make", "lib"],
				690	env=my_environment,
				691	universal_newlines=True,
				692	stdout=subprocess.PIPE,
				693	stderr=subprocess.STDOUT,
				694	check=True
				695	)
				696
				697	# Perform object file analysis using nm
				698	symbols = self.parse_symbols_from_nm([
				699	"library/libmbedcrypto.a",
				700	"library/libmbedtls.a",
				701	"library/libmbedx509.a"
				702	])
				703
				704	subprocess.run(
				705	["make", "clean"],
				706	universal_newlines=True,
				707	check=True
				708	)
				709	except subprocess.CalledProcessError as error:
				710	self.log.debug(error.output)
				711	raise error
				712	finally:
				713	# Put back the original config regardless of there being errors.
				714	# Works also for keyboard interrupts.
				715	shutil.move(
Gilles Peskine	d47f636	2021-09-27 20:12:00 +0200	[diff] [blame]	716	"include/mbedtls/config.h.bak",
				717	"include/mbedtls/config.h"
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	718	)
				719
				720	return symbols
				721
				722	def parse_symbols_from_nm(self, object_files):
				723	"""
				724	Run nm to retrieve the list of referenced symbols in each object file.
				725	Does not return the position data since it is of no use.
				726
				727	Args:
				728	* object_files: a List of compiled object filepaths to search through.
				729
				730	Returns a List of unique symbols defined and used in any of the object
				731	files.
				732	"""
				733	nm_undefined_regex = re.compile(r"^\S+: +U \|^$\|^\S+:$")
				734	nm_valid_regex = re.compile(r"^\S+( [0-9A-Fa-f]+)* . _*(?P<symbol>\w+)")
				735	exclusions = ("FStar", "Hacl")
				736
				737	symbols = []
				738
				739	# Gather all outputs of nm
				740	nm_output = ""
				741	for lib in object_files:
				742	nm_output += subprocess.run(
				743	["nm", "-og", lib],
				744	universal_newlines=True,
				745	stdout=subprocess.PIPE,
				746	stderr=subprocess.STDOUT,
				747	check=True
				748	).stdout
				749
				750	for line in nm_output.splitlines():
				751	if not nm_undefined_regex.search(line):
				752	symbol = nm_valid_regex.search(line)
				753	if (symbol and not symbol.group("symbol").startswith(exclusions)):
				754	symbols.append(symbol.group("symbol"))
				755	else:
				756	self.log.error(line)
				757
				758	return symbols
				759
				760	class NameChecker():
				761	"""
				762	Representation of the core name checking operation performed by this script.
				763	"""
				764	def __init__(self, parse_result, log):
				765	self.parse_result = parse_result
				766	self.log = log
				767
				768	def perform_checks(self, quiet=False):
				769	"""
				770	A comprehensive checker that performs each check in order, and outputs
				771	a final verdict.
				772
				773	Args:
				774	* quiet: whether to hide detailed problem explanation.
				775	"""
				776	self.log.info("=============")
				777	Problem.quiet = quiet
				778	problems = 0
				779	problems += self.check_symbols_declared_in_header()
				780
				781	pattern_checks = [
				782	("macros", MACRO_PATTERN),
				783	("enum_consts", CONSTANTS_PATTERN),
				784	("identifiers", IDENTIFIER_PATTERN)
				785	]
				786	for group, check_pattern in pattern_checks:
				787	problems += self.check_match_pattern(group, check_pattern)
				788
				789	problems += self.check_for_typos()
				790
				791	self.log.info("=============")
				792	if problems > 0:
				793	self.log.info("FAIL: {0} problem(s) to fix".format(str(problems)))
				794	if quiet:
				795	self.log.info("Remove --quiet to see explanations.")
				796	else:
				797	self.log.info("Use --quiet for minimal output.")
				798	return 1
				799	else:
				800	self.log.info("PASS")
				801	return 0
				802
				803	def check_symbols_declared_in_header(self):
				804	"""
				805	Perform a check that all detected symbols in the library object files
				806	are properly declared in headers.
				807	Assumes parse_names_in_source() was called before this.
Aditya Deshpande	7d20bb4	2023-01-27 15:45:32 +0000	[diff] [blame]	808
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	809	Returns the number of problems that need fixing.
				810	"""
				811	problems = []
Aditya Deshpande	94375c8	2023-01-25 17:00:12 +0000	[diff] [blame]	812	all_identifiers = self.parse_result["identifiers"] + \
				813	self.parse_result["excluded_identifiers"]
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	814
				815	for symbol in self.parse_result["symbols"]:
				816	found_symbol_declared = False
Aditya Deshpande	94375c8	2023-01-25 17:00:12 +0000	[diff] [blame]	817	for identifier_match in all_identifiers:
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	818	if symbol == identifier_match.name:
				819	found_symbol_declared = True
				820	break
				821
				822	if not found_symbol_declared:
				823	problems.append(SymbolNotInHeader(symbol))
				824
				825	self.output_check_result("All symbols in header", problems)
				826	return len(problems)
				827
				828	def check_match_pattern(self, group_to_check, check_pattern):
				829	"""
				830	Perform a check that all items of a group conform to a regex pattern.
				831	Assumes parse_names_in_source() was called before this.
				832
				833	Args:
				834	* group_to_check: string key to index into self.parse_result.
				835	* check_pattern: the regex to check against.
				836
				837	Returns the number of problems that need fixing.
				838	"""
				839	problems = []
				840
				841	for item_match in self.parse_result[group_to_check]:
				842	if not re.search(check_pattern, item_match.name):
				843	problems.append(PatternMismatch(check_pattern, item_match))
				844	# Double underscore should not be used for names
				845	if re.search(r".__.", item_match.name):
				846	problems.append(
				847	PatternMismatch("no double underscore allowed", item_match))
				848
				849	self.output_check_result(
				850	"Naming patterns of {}".format(group_to_check),
				851	problems)
				852	return len(problems)
				853
				854	def check_for_typos(self):
				855	"""
Shaun Case	0e7791f	2021-12-20 21:14:10 -0800	[diff] [blame]	856	Perform a check that all words in the source code beginning with MBED are
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	857	either defined as macros, or as enum constants.
				858	Assumes parse_names_in_source() was called before this.
				859
				860	Returns the number of problems that need fixing.
				861	"""
				862	problems = []
				863
				864	# Set comprehension, equivalent to a list comprehension wrapped by set()
				865	all_caps_names = {
				866	match.name
				867	for match
Pengyu Lv	018b2f6	2022-11-08 15:55:00 +0800	[diff] [blame]	868	in self.parse_result["macros"] +
				869	self.parse_result["private_macros"] +
				870	self.parse_result["enum_consts"]
				871	}
Ronald Cron	b814bda	2021-09-13 14:50:42 +0200	[diff] [blame]	872	typo_exclusion = re.compile(r"XXX\|__\|_$\|^MBEDTLS_.*CONFIG_FILE$\|"
Pengyu Lv	fda7f50	2022-11-08 16:56:51 +0800	[diff] [blame]	873	r"MBEDTLS_TEST_LIBTESTDRIVER*\|"
				874	r"PSA_CRYPTO_DRIVER_TEST")
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	875
Pengyu Lv	018b2f6	2022-11-08 15:55:00 +0800	[diff] [blame]	876	for name_match in self.parse_result["mbed_psa_words"]:
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	877	found = name_match.name in all_caps_names
				878
				879	# Since MBEDTLS_PSA_ACCEL_XXX defines are defined by the
				880	# PSA driver, they will not exist as macros. However, they
				881	# should still be checked for typos using the equivalent
				882	# BUILTINs that exist.
				883	if "MBEDTLS_PSA_ACCEL_" in name_match.name:
				884	found = name_match.name.replace(
				885	"MBEDTLS_PSA_ACCEL_",
				886	"MBEDTLS_PSA_BUILTIN_") in all_caps_names
				887
				888	if not found and not typo_exclusion.search(name_match.name):
				889	problems.append(Typo(name_match))
				890
				891	self.output_check_result("Likely typos", problems)
				892	return len(problems)
				893
				894	def output_check_result(self, name, problems):
				895	"""
				896	Write out the PASS/FAIL status of a performed check depending on whether
				897	there were problems.
				898
				899	Args:
				900	* name: the name of the test
				901	* problems: a List of encountered Problems
				902	"""
				903	if problems:
				904	self.log.info("{}: FAIL\n".format(name))
				905	for problem in problems:
				906	self.log.warning(str(problem))
				907	else:
				908	self.log.info("{}: PASS".format(name))
				909
				910	def main():
				911	"""
				912	Perform argument parsing, and create an instance of CodeParser and
				913	NameChecker to begin the core operation.
				914	"""
				915	parser = argparse.ArgumentParser(
				916	formatter_class=argparse.RawDescriptionHelpFormatter,
				917	description=(
				918	"This script confirms that the naming of all symbols and identifiers "
				919	"in Mbed TLS are consistent with the house style and are also "
				920	"self-consistent.\n\n"
Thomas Daubney	0814a22	2023-10-06 17:37:01 +0100	[diff] [blame]	921	"Expected to be run from the Mbed TLS root directory.")
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	922	)
				923	parser.add_argument(
				924	"-v", "--verbose",
				925	action="store_true",
				926	help="show parse results"
				927	)
				928	parser.add_argument(
				929	"-q", "--quiet",
				930	action="store_true",
Tom Cosgrove	49f99bc	2022-12-04 16:44:21 +0000	[diff] [blame]	931	help="hide unnecessary text, explanations, and highlights"
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	932	)
				933
				934	args = parser.parse_args()
				935
				936	# Configure the global logger, which is then passed to the classes below
				937	log = logging.getLogger()
				938	log.setLevel(logging.DEBUG if args.verbose else logging.INFO)
				939	log.addHandler(logging.StreamHandler())
				940
				941	try:
				942	code_parser = CodeParser(log)
				943	parse_result = code_parser.comprehensive_parse()
				944	except Exception: # pylint: disable=broad-except
				945	traceback.print_exc()
				946	sys.exit(2)
				947
				948	name_checker = NameChecker(parse_result, log)
				949	return_code = name_checker.perform_checks(quiet=args.quiet)
				950
				951	sys.exit(return_code)
				952
				953	if __name__ == "__main__":
				954	main()