blob: 21c08eda4805b7710fe12934be1601ab282cec05 [file] [log] [blame]
Gilles Peskinee7c44552021-01-25 21:40:45 +01001"""Collect macro definitions from header files.
2"""
3
4# Copyright The Mbed TLS Contributors
5# SPDX-License-Identifier: Apache-2.0
6#
7# Licensed under the Apache License, Version 2.0 (the "License"); you may
8# not use this file except in compliance with the License.
9# You may obtain a copy of the License at
10#
11# http://www.apache.org/licenses/LICENSE-2.0
12#
13# Unless required by applicable law or agreed to in writing, software
14# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
15# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16# See the License for the specific language governing permissions and
17# limitations under the License.
18
Gilles Peskine22fcf1b2021-03-10 01:02:39 +010019import itertools
Gilles Peskinee7c44552021-01-25 21:40:45 +010020import re
Gilles Peskineaeb8d662022-03-04 20:02:00 +010021from typing import Dict, IO, Iterable, Iterator, List, Optional, Pattern, Set, Tuple, Union
Gilles Peskine3cf3a8e2021-03-30 19:09:05 +020022
23
24class ReadFileLineException(Exception):
25 def __init__(self, filename: str, line_number: Union[int, str]) -> None:
26 message = 'in {} at {}'.format(filename, line_number)
27 super(ReadFileLineException, self).__init__(message)
28 self.filename = filename
29 self.line_number = line_number
30
31
32class read_file_lines:
33 # Dear Pylint, conventionally, a context manager class name is lowercase.
34 # pylint: disable=invalid-name,too-few-public-methods
35 """Context manager to read a text file line by line.
36
37 ```
38 with read_file_lines(filename) as lines:
39 for line in lines:
40 process(line)
41 ```
42 is equivalent to
43 ```
44 with open(filename, 'r') as input_file:
45 for line in input_file:
46 process(line)
47 ```
48 except that if process(line) raises an exception, then the read_file_lines
49 snippet annotates the exception with the file name and line number.
50 """
51 def __init__(self, filename: str, binary: bool = False) -> None:
52 self.filename = filename
Gilles Peskineaeb8d662022-03-04 20:02:00 +010053 self.file = None #type: Optional[IO[str]]
Gilles Peskine3cf3a8e2021-03-30 19:09:05 +020054 self.line_number = 'entry' #type: Union[int, str]
55 self.generator = None #type: Optional[Iterable[Tuple[int, str]]]
56 self.binary = binary
57 def __enter__(self) -> 'read_file_lines':
Gilles Peskineaeb8d662022-03-04 20:02:00 +010058 self.file = open(self.filename, 'rb' if self.binary else 'r')
59 self.generator = enumerate(self.file)
Gilles Peskine3cf3a8e2021-03-30 19:09:05 +020060 return self
61 def __iter__(self) -> Iterator[str]:
62 assert self.generator is not None
63 for line_number, content in self.generator:
64 self.line_number = line_number
65 yield content
66 self.line_number = 'exit'
67 def __exit__(self, exc_type, exc_value, exc_traceback) -> None:
Gilles Peskineaeb8d662022-03-04 20:02:00 +010068 if self.file is not None:
69 self.file.close()
Gilles Peskine3cf3a8e2021-03-30 19:09:05 +020070 if exc_type is not None:
71 raise ReadFileLineException(self.filename, self.line_number) \
72 from exc_value
Gilles Peskine22fcf1b2021-03-10 01:02:39 +010073
74
75class PSAMacroEnumerator:
76 """Information about constructors of various PSA Crypto types.
77
78 This includes macro names as well as information about their arguments
79 when applicable.
80
81 This class only provides ways to enumerate expressions that evaluate to
82 values of the covered types. Derived classes are expected to populate
83 the set of known constructors of each kind, as well as populate
84 `self.arguments_for` for arguments that are not of a kind that is
85 enumerated here.
86 """
Gilles Peskine4c7da692021-04-21 21:39:27 +020087 #pylint: disable=too-many-instance-attributes
Gilles Peskine22fcf1b2021-03-10 01:02:39 +010088
89 def __init__(self) -> None:
90 """Set up an empty set of known constructor macros.
91 """
92 self.statuses = set() #type: Set[str]
Gilles Peskine4c7da692021-04-21 21:39:27 +020093 self.lifetimes = set() #type: Set[str]
94 self.locations = set() #type: Set[str]
95 self.persistence_levels = set() #type: Set[str]
Gilles Peskine22fcf1b2021-03-10 01:02:39 +010096 self.algorithms = set() #type: Set[str]
97 self.ecc_curves = set() #type: Set[str]
98 self.dh_groups = set() #type: Set[str]
99 self.key_types = set() #type: Set[str]
100 self.key_usage_flags = set() #type: Set[str]
101 self.hash_algorithms = set() #type: Set[str]
102 self.mac_algorithms = set() #type: Set[str]
103 self.ka_algorithms = set() #type: Set[str]
104 self.kdf_algorithms = set() #type: Set[str]
105 self.aead_algorithms = set() #type: Set[str]
gabor-mezei-arm044fefc2021-06-24 10:16:44 +0200106 self.sign_algorithms = set() #type: Set[str]
Gilles Peskine22fcf1b2021-03-10 01:02:39 +0100107 # macro name -> list of argument names
108 self.argspecs = {} #type: Dict[str, List[str]]
109 # argument name -> list of values
110 self.arguments_for = {
111 'mac_length': [],
112 'min_mac_length': [],
113 'tag_length': [],
114 'min_tag_length': [],
115 } #type: Dict[str, List[str]]
Gilles Peskine46d3a372021-05-20 21:37:06 +0200116 # Whether to include intermediate macros in enumerations. Intermediate
117 # macros serve as category headers and are not valid values of their
118 # type. See `is_internal_name`.
119 # Always false in this class, may be set to true in derived classes.
Gilles Peskineb93f8542021-04-19 13:50:25 +0200120 self.include_intermediate = False
121
122 def is_internal_name(self, name: str) -> bool:
123 """Whether this is an internal macro. Internal macros will be skipped."""
124 if not self.include_intermediate:
125 if name.endswith('_BASE') or name.endswith('_NONE'):
126 return True
127 if '_CATEGORY_' in name:
128 return True
129 return name.endswith('_FLAG') or name.endswith('_MASK')
Gilles Peskine22fcf1b2021-03-10 01:02:39 +0100130
131 def gather_arguments(self) -> None:
132 """Populate the list of values for macro arguments.
133
134 Call this after parsing all the inputs.
135 """
136 self.arguments_for['hash_alg'] = sorted(self.hash_algorithms)
137 self.arguments_for['mac_alg'] = sorted(self.mac_algorithms)
138 self.arguments_for['ka_alg'] = sorted(self.ka_algorithms)
139 self.arguments_for['kdf_alg'] = sorted(self.kdf_algorithms)
140 self.arguments_for['aead_alg'] = sorted(self.aead_algorithms)
gabor-mezei-arm044fefc2021-06-24 10:16:44 +0200141 self.arguments_for['sign_alg'] = sorted(self.sign_algorithms)
Gilles Peskine22fcf1b2021-03-10 01:02:39 +0100142 self.arguments_for['curve'] = sorted(self.ecc_curves)
143 self.arguments_for['group'] = sorted(self.dh_groups)
Gilles Peskine4c7da692021-04-21 21:39:27 +0200144 self.arguments_for['persistence'] = sorted(self.persistence_levels)
145 self.arguments_for['location'] = sorted(self.locations)
146 self.arguments_for['lifetime'] = sorted(self.lifetimes)
Gilles Peskine22fcf1b2021-03-10 01:02:39 +0100147
148 @staticmethod
149 def _format_arguments(name: str, arguments: Iterable[str]) -> str:
Gilles Peskine0a93c1b2021-04-21 15:36:58 +0200150 """Format a macro call with arguments.
151
152 The resulting format is consistent with
153 `InputsForTest.normalize_argument`.
154 """
Gilles Peskine22fcf1b2021-03-10 01:02:39 +0100155 return name + '(' + ', '.join(arguments) + ')'
156
157 _argument_split_re = re.compile(r' *, *')
158 @classmethod
159 def _argument_split(cls, arguments: str) -> List[str]:
160 return re.split(cls._argument_split_re, arguments)
161
162 def distribute_arguments(self, name: str) -> Iterator[str]:
163 """Generate macro calls with each tested argument set.
164
165 If name is a macro without arguments, just yield "name".
166 If name is a macro with arguments, yield a series of
167 "name(arg1,...,argN)" where each argument takes each possible
168 value at least once.
169 """
170 try:
171 if name not in self.argspecs:
172 yield name
173 return
174 argspec = self.argspecs[name]
175 if argspec == []:
176 yield name + '()'
177 return
178 argument_lists = [self.arguments_for[arg] for arg in argspec]
179 arguments = [values[0] for values in argument_lists]
180 yield self._format_arguments(name, arguments)
181 # Dear Pylint, enumerate won't work here since we're modifying
182 # the array.
183 # pylint: disable=consider-using-enumerate
184 for i in range(len(arguments)):
185 for value in argument_lists[i][1:]:
186 arguments[i] = value
187 yield self._format_arguments(name, arguments)
Gilles Peskined36ed482022-03-19 10:36:07 +0100188 arguments[i] = argument_lists[i][0]
Gilles Peskine22fcf1b2021-03-10 01:02:39 +0100189 except BaseException as e:
190 raise Exception('distribute_arguments({})'.format(name)) from e
191
Gilles Peskine08966e62021-04-21 15:37:34 +0200192 def distribute_arguments_without_duplicates(
193 self, seen: Set[str], name: str
194 ) -> Iterator[str]:
195 """Same as `distribute_arguments`, but don't repeat seen results."""
196 for result in self.distribute_arguments(name):
197 if result not in seen:
198 seen.add(result)
199 yield result
200
Gilles Peskine22fcf1b2021-03-10 01:02:39 +0100201 def generate_expressions(self, names: Iterable[str]) -> Iterator[str]:
202 """Generate expressions covering values constructed from the given names.
203
204 `names` can be any iterable collection of macro names.
205
206 For example:
207 * ``generate_expressions(['PSA_ALG_CMAC', 'PSA_ALG_HMAC'])``
208 generates ``'PSA_ALG_CMAC'`` as well as ``'PSA_ALG_HMAC(h)'`` for
209 every known hash algorithm ``h``.
210 * ``macros.generate_expressions(macros.key_types)`` generates all
211 key types.
212 """
Gilles Peskine08966e62021-04-21 15:37:34 +0200213 seen = set() #type: Set[str]
214 return itertools.chain(*(
215 self.distribute_arguments_without_duplicates(seen, name)
216 for name in names
217 ))
Gilles Peskine22fcf1b2021-03-10 01:02:39 +0100218
Gilles Peskinee7c44552021-01-25 21:40:45 +0100219
Gilles Peskine33c601c2021-03-10 01:25:50 +0100220class PSAMacroCollector(PSAMacroEnumerator):
Gilles Peskinee7c44552021-01-25 21:40:45 +0100221 """Collect PSA crypto macro definitions from C header files.
222 """
223
Gilles Peskine10ab2672021-03-10 00:59:53 +0100224 def __init__(self, include_intermediate: bool = False) -> None:
Gilles Peskine13d60eb2021-01-25 22:42:14 +0100225 """Set up an object to collect PSA macro definitions.
226
227 Call the read_file method of the constructed object on each header file.
228
229 * include_intermediate: if true, include intermediate macros such as
230 PSA_XXX_BASE that do not designate semantic values.
231 """
Gilles Peskine33c601c2021-03-10 01:25:50 +0100232 super().__init__()
Gilles Peskine13d60eb2021-01-25 22:42:14 +0100233 self.include_intermediate = include_intermediate
Gilles Peskine10ab2672021-03-10 00:59:53 +0100234 self.key_types_from_curve = {} #type: Dict[str, str]
235 self.key_types_from_group = {} #type: Dict[str, str]
Gilles Peskine10ab2672021-03-10 00:59:53 +0100236 self.algorithms_from_hash = {} #type: Dict[str, str]
Gilles Peskinee7c44552021-01-25 21:40:45 +0100237
Gilles Peskine35451032021-10-04 18:10:16 +0200238 @staticmethod
239 def algorithm_tester(name: str) -> str:
240 """The predicate for whether an algorithm is built from the given constructor.
241
242 The given name must be the name of an algorithm constructor of the
243 form ``PSA_ALG_xxx`` which is used as ``PSA_ALG_xxx(yyy)`` to build
244 an algorithm value. Return the corresponding predicate macro which
245 is used as ``predicate(alg)`` to test whether ``alg`` can be built
246 as ``PSA_ALG_xxx(yyy)``. The predicate is usually called
247 ``PSA_ALG_IS_xxx``.
248 """
249 prefix = 'PSA_ALG_'
250 assert name.startswith(prefix)
251 midfix = 'IS_'
252 suffix = name[len(prefix):]
253 if suffix in ['DSA', 'ECDSA']:
254 midfix += 'RANDOMIZED_'
Gilles Peskine35115f92021-10-04 18:10:38 +0200255 elif suffix == 'RSA_PSS':
256 suffix += '_STANDARD_SALT'
Gilles Peskine35451032021-10-04 18:10:16 +0200257 return prefix + midfix + suffix
258
Gilles Peskine33c601c2021-03-10 01:25:50 +0100259 def record_algorithm_subtype(self, name: str, expansion: str) -> None:
260 """Record the subtype of an algorithm constructor.
261
262 Given a ``PSA_ALG_xxx`` macro name and its expansion, if the algorithm
263 is of a subtype that is tracked in its own set, add it to the relevant
264 set.
265 """
266 # This code is very ad hoc and fragile. It should be replaced by
267 # something more robust.
268 if re.match(r'MAC(?:_|\Z)', name):
269 self.mac_algorithms.add(name)
270 elif re.match(r'KDF(?:_|\Z)', name):
271 self.kdf_algorithms.add(name)
272 elif re.search(r'0x020000[0-9A-Fa-f]{2}', expansion):
273 self.hash_algorithms.add(name)
274 elif re.search(r'0x03[0-9A-Fa-f]{6}', expansion):
275 self.mac_algorithms.add(name)
276 elif re.search(r'0x05[0-9A-Fa-f]{6}', expansion):
277 self.aead_algorithms.add(name)
278 elif re.search(r'0x09[0-9A-Fa-f]{2}0000', expansion):
279 self.ka_algorithms.add(name)
280 elif re.search(r'0x08[0-9A-Fa-f]{6}', expansion):
281 self.kdf_algorithms.add(name)
282
Gilles Peskinee7c44552021-01-25 21:40:45 +0100283 # "#define" followed by a macro name with either no parameters
284 # or a single parameter and a non-empty expansion.
285 # Grab the macro name in group 1, the parameter name if any in group 2
286 # and the expansion in group 3.
287 _define_directive_re = re.compile(r'\s*#\s*define\s+(\w+)' +
288 r'(?:\s+|\((\w+)\)\s*)' +
289 r'(.+)')
290 _deprecated_definition_re = re.compile(r'\s*MBEDTLS_DEPRECATED')
291
292 def read_line(self, line):
293 """Parse a C header line and record the PSA identifier it defines if any.
294 This function analyzes lines that start with "#define PSA_"
295 (up to non-significant whitespace) and skips all non-matching lines.
296 """
297 # pylint: disable=too-many-branches
298 m = re.match(self._define_directive_re, line)
299 if not m:
300 return
301 name, parameter, expansion = m.groups()
302 expansion = re.sub(r'/\*.*?\*/|//.*', r' ', expansion)
Gilles Peskine33c601c2021-03-10 01:25:50 +0100303 if parameter:
304 self.argspecs[name] = [parameter]
Gilles Peskinee7c44552021-01-25 21:40:45 +0100305 if re.match(self._deprecated_definition_re, expansion):
306 # Skip deprecated values, which are assumed to be
307 # backward compatibility aliases that share
308 # numerical values with non-deprecated values.
309 return
Gilles Peskinef8deb752021-01-25 22:41:45 +0100310 if self.is_internal_name(name):
Gilles Peskinee7c44552021-01-25 21:40:45 +0100311 # Macro only to build actual values
312 return
313 elif (name.startswith('PSA_ERROR_') or name == 'PSA_SUCCESS') \
314 and not parameter:
315 self.statuses.add(name)
316 elif name.startswith('PSA_KEY_TYPE_') and not parameter:
317 self.key_types.add(name)
318 elif name.startswith('PSA_KEY_TYPE_') and parameter == 'curve':
319 self.key_types_from_curve[name] = name[:13] + 'IS_' + name[13:]
320 elif name.startswith('PSA_KEY_TYPE_') and parameter == 'group':
321 self.key_types_from_group[name] = name[:13] + 'IS_' + name[13:]
322 elif name.startswith('PSA_ECC_FAMILY_') and not parameter:
323 self.ecc_curves.add(name)
324 elif name.startswith('PSA_DH_FAMILY_') and not parameter:
325 self.dh_groups.add(name)
326 elif name.startswith('PSA_ALG_') and not parameter:
327 if name in ['PSA_ALG_ECDSA_BASE',
328 'PSA_ALG_RSA_PKCS1V15_SIGN_BASE']:
329 # Ad hoc skipping of duplicate names for some numerical values
330 return
331 self.algorithms.add(name)
Gilles Peskine33c601c2021-03-10 01:25:50 +0100332 self.record_algorithm_subtype(name, expansion)
Gilles Peskinee7c44552021-01-25 21:40:45 +0100333 elif name.startswith('PSA_ALG_') and parameter == 'hash_alg':
Gilles Peskine35451032021-10-04 18:10:16 +0200334 self.algorithms_from_hash[name] = self.algorithm_tester(name)
Gilles Peskinee7c44552021-01-25 21:40:45 +0100335 elif name.startswith('PSA_KEY_USAGE_') and not parameter:
Gilles Peskine33c601c2021-03-10 01:25:50 +0100336 self.key_usage_flags.add(name)
Gilles Peskinee7c44552021-01-25 21:40:45 +0100337 else:
338 # Other macro without parameter
339 return
340
341 _nonascii_re = re.compile(rb'[^\x00-\x7f]+')
342 _continued_line_re = re.compile(rb'\\\r?\n\Z')
343 def read_file(self, header_file):
344 for line in header_file:
345 m = re.search(self._continued_line_re, line)
346 while m:
347 cont = next(header_file)
348 line = line[:m.start(0)] + cont
349 m = re.search(self._continued_line_re, line)
350 line = re.sub(self._nonascii_re, rb'', line).decode('ascii')
351 self.read_line(line)
Gilles Peskine3cf3a8e2021-03-30 19:09:05 +0200352
353
Gilles Peskineb93f8542021-04-19 13:50:25 +0200354class InputsForTest(PSAMacroEnumerator):
Gilles Peskine3cf3a8e2021-03-30 19:09:05 +0200355 # pylint: disable=too-many-instance-attributes
356 """Accumulate information about macros to test.
357enumerate
358 This includes macro names as well as information about their arguments
359 when applicable.
360 """
361
362 def __init__(self) -> None:
363 super().__init__()
364 self.all_declared = set() #type: Set[str]
Gilles Peskine3cf3a8e2021-03-30 19:09:05 +0200365 # Identifier prefixes
366 self.table_by_prefix = {
367 'ERROR': self.statuses,
368 'ALG': self.algorithms,
369 'ECC_CURVE': self.ecc_curves,
370 'DH_GROUP': self.dh_groups,
Gilles Peskine4c7da692021-04-21 21:39:27 +0200371 'KEY_LIFETIME': self.lifetimes,
372 'KEY_LOCATION': self.locations,
373 'KEY_PERSISTENCE': self.persistence_levels,
Gilles Peskine3cf3a8e2021-03-30 19:09:05 +0200374 'KEY_TYPE': self.key_types,
375 'KEY_USAGE': self.key_usage_flags,
376 } #type: Dict[str, Set[str]]
377 # Test functions
378 self.table_by_test_function = {
379 # Any function ending in _algorithm also gets added to
380 # self.algorithms.
381 'key_type': [self.key_types],
382 'block_cipher_key_type': [self.key_types],
383 'stream_cipher_key_type': [self.key_types],
384 'ecc_key_family': [self.ecc_curves],
385 'ecc_key_types': [self.ecc_curves],
386 'dh_key_family': [self.dh_groups],
387 'dh_key_types': [self.dh_groups],
388 'hash_algorithm': [self.hash_algorithms],
389 'mac_algorithm': [self.mac_algorithms],
390 'cipher_algorithm': [],
gabor-mezei-arm044fefc2021-06-24 10:16:44 +0200391 'hmac_algorithm': [self.mac_algorithms, self.sign_algorithms],
Gilles Peskine3cf3a8e2021-03-30 19:09:05 +0200392 'aead_algorithm': [self.aead_algorithms],
393 'key_derivation_algorithm': [self.kdf_algorithms],
394 'key_agreement_algorithm': [self.ka_algorithms],
gabor-mezei-arm044fefc2021-06-24 10:16:44 +0200395 'asymmetric_signature_algorithm': [self.sign_algorithms],
Gilles Peskine3cf3a8e2021-03-30 19:09:05 +0200396 'asymmetric_signature_wildcard': [self.algorithms],
397 'asymmetric_encryption_algorithm': [],
398 'other_algorithm': [],
Gilles Peskine4c7da692021-04-21 21:39:27 +0200399 'lifetime': [self.lifetimes],
Gilles Peskine3cf3a8e2021-03-30 19:09:05 +0200400 } #type: Dict[str, List[Set[str]]]
Gilles Peskinec77f16b2022-03-18 18:46:00 +0100401 mac_lengths = [str(n) for n in [
402 1, # minimum expressible
403 4, # minimum allowed by policy
404 13, # an odd size in a plausible range
405 14, # an even non-power-of-two size in a plausible range
406 16, # same as full size for at least one algorithm
407 63, # maximum expressible
408 ]]
409 self.arguments_for['mac_length'] += mac_lengths
410 self.arguments_for['min_mac_length'] += mac_lengths
411 aead_lengths = [str(n) for n in [
412 1, # minimum expressible
413 4, # minimum allowed by policy
414 13, # an odd size in a plausible range
415 14, # an even non-power-of-two size in a plausible range
416 16, # same as full size for at least one algorithm
417 63, # maximum expressible
418 ]]
419 self.arguments_for['tag_length'] += aead_lengths
420 self.arguments_for['min_tag_length'] += aead_lengths
Gilles Peskine3cf3a8e2021-03-30 19:09:05 +0200421
Gilles Peskined6d2d6a2021-03-30 21:46:35 +0200422 def add_numerical_values(self) -> None:
423 """Add numerical values that are not supported to the known identifiers."""
424 # Sets of names per type
425 self.algorithms.add('0xffffffff')
426 self.ecc_curves.add('0xff')
427 self.dh_groups.add('0xff')
428 self.key_types.add('0xffff')
429 self.key_usage_flags.add('0x80000000')
430
431 # Hard-coded values for unknown algorithms
432 #
433 # These have to have values that are correct for their respective
434 # PSA_ALG_IS_xxx macros, but are also not currently assigned and are
435 # not likely to be assigned in the near future.
436 self.hash_algorithms.add('0x020000fe') # 0x020000ff is PSA_ALG_ANY_HASH
437 self.mac_algorithms.add('0x03007fff')
438 self.ka_algorithms.add('0x09fc0000')
439 self.kdf_algorithms.add('0x080000ff')
440 # For AEAD algorithms, the only variability is over the tag length,
441 # and this only applies to known algorithms, so don't test an
442 # unknown algorithm.
443
Gilles Peskine3cf3a8e2021-03-30 19:09:05 +0200444 def get_names(self, type_word: str) -> Set[str]:
445 """Return the set of known names of values of the given type."""
446 return {
447 'status': self.statuses,
448 'algorithm': self.algorithms,
449 'ecc_curve': self.ecc_curves,
450 'dh_group': self.dh_groups,
451 'key_type': self.key_types,
452 'key_usage': self.key_usage_flags,
453 }[type_word]
454
455 # Regex for interesting header lines.
456 # Groups: 1=macro name, 2=type, 3=argument list (optional).
457 _header_line_re = \
458 re.compile(r'#define +' +
459 r'(PSA_((?:(?:DH|ECC|KEY)_)?[A-Z]+)_\w+)' +
460 r'(?:\(([^\n()]*)\))?')
461 # Regex of macro names to exclude.
462 _excluded_name_re = re.compile(r'_(?:GET|IS|OF)_|_(?:BASE|FLAG|MASK)\Z')
463 # Additional excluded macros.
464 _excluded_names = set([
465 # Macros that provide an alternative way to build the same
466 # algorithm as another macro.
467 'PSA_ALG_AEAD_WITH_DEFAULT_LENGTH_TAG',
468 'PSA_ALG_FULL_LENGTH_MAC',
469 # Auxiliary macro whose name doesn't fit the usual patterns for
470 # auxiliary macros.
471 'PSA_ALG_AEAD_WITH_DEFAULT_LENGTH_TAG_CASE',
472 ])
473 def parse_header_line(self, line: str) -> None:
474 """Parse a C header line, looking for "#define PSA_xxx"."""
475 m = re.match(self._header_line_re, line)
476 if not m:
477 return
478 name = m.group(1)
479 self.all_declared.add(name)
480 if re.search(self._excluded_name_re, name) or \
Gilles Peskineb93f8542021-04-19 13:50:25 +0200481 name in self._excluded_names or \
482 self.is_internal_name(name):
Gilles Peskine3cf3a8e2021-03-30 19:09:05 +0200483 return
484 dest = self.table_by_prefix.get(m.group(2))
485 if dest is None:
486 return
487 dest.add(name)
488 if m.group(3):
489 self.argspecs[name] = self._argument_split(m.group(3))
490
491 _nonascii_re = re.compile(rb'[^\x00-\x7f]+') #type: Pattern
492 def parse_header(self, filename: str) -> None:
493 """Parse a C header file, looking for "#define PSA_xxx"."""
494 with read_file_lines(filename, binary=True) as lines:
495 for line in lines:
496 line = re.sub(self._nonascii_re, rb'', line).decode('ascii')
497 self.parse_header_line(line)
498
499 _macro_identifier_re = re.compile(r'[A-Z]\w+')
500 def generate_undeclared_names(self, expr: str) -> Iterable[str]:
501 for name in re.findall(self._macro_identifier_re, expr):
502 if name not in self.all_declared:
503 yield name
504
505 def accept_test_case_line(self, function: str, argument: str) -> bool:
506 #pylint: disable=unused-argument
507 undeclared = list(self.generate_undeclared_names(argument))
508 if undeclared:
509 raise Exception('Undeclared names in test case', undeclared)
510 return True
511
Gilles Peskine0a93c1b2021-04-21 15:36:58 +0200512 @staticmethod
513 def normalize_argument(argument: str) -> str:
514 """Normalize whitespace in the given C expression.
515
516 The result uses the same whitespace as
517 ` PSAMacroEnumerator.distribute_arguments`.
518 """
519 return re.sub(r',', r', ', re.sub(r' +', r'', argument))
520
Gilles Peskine3cf3a8e2021-03-30 19:09:05 +0200521 def add_test_case_line(self, function: str, argument: str) -> None:
522 """Parse a test case data line, looking for algorithm metadata tests."""
523 sets = []
524 if function.endswith('_algorithm'):
525 sets.append(self.algorithms)
526 if function == 'key_agreement_algorithm' and \
527 argument.startswith('PSA_ALG_KEY_AGREEMENT('):
528 # We only want *raw* key agreement algorithms as such, so
529 # exclude ones that are already chained with a KDF.
530 # Keep the expression as one to test as an algorithm.
531 function = 'other_algorithm'
532 sets += self.table_by_test_function[function]
533 if self.accept_test_case_line(function, argument):
534 for s in sets:
Gilles Peskine0a93c1b2021-04-21 15:36:58 +0200535 s.add(self.normalize_argument(argument))
Gilles Peskine3cf3a8e2021-03-30 19:09:05 +0200536
537 # Regex matching a *.data line containing a test function call and
538 # its arguments. The actual definition is partly positional, but this
539 # regex is good enough in practice.
540 _test_case_line_re = re.compile(r'(?!depends_on:)(\w+):([^\n :][^:\n]*)')
541 def parse_test_cases(self, filename: str) -> None:
542 """Parse a test case file (*.data), looking for algorithm metadata tests."""
543 with read_file_lines(filename) as lines:
544 for line in lines:
545 m = re.match(self._test_case_line_re, line)
546 if m:
547 self.add_test_case_line(m.group(1), m.group(2))