| Gilles Peskine | e7c4455 | 2021-01-25 21:40:45 +0100 | [diff] [blame] | 1 | """Collect macro definitions from header files. | 
|  | 2 | """ | 
|  | 3 |  | 
|  | 4 | # Copyright The Mbed TLS Contributors | 
|  | 5 | # SPDX-License-Identifier: Apache-2.0 | 
|  | 6 | # | 
|  | 7 | # Licensed under the Apache License, Version 2.0 (the "License"); you may | 
|  | 8 | # not use this file except in compliance with the License. | 
|  | 9 | # You may obtain a copy of the License at | 
|  | 10 | # | 
|  | 11 | # http://www.apache.org/licenses/LICENSE-2.0 | 
|  | 12 | # | 
|  | 13 | # Unless required by applicable law or agreed to in writing, software | 
|  | 14 | # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT | 
|  | 15 | # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
|  | 16 | # See the License for the specific language governing permissions and | 
|  | 17 | # limitations under the License. | 
|  | 18 |  | 
| Gilles Peskine | 22fcf1b | 2021-03-10 01:02:39 +0100 | [diff] [blame] | 19 | import itertools | 
| Gilles Peskine | e7c4455 | 2021-01-25 21:40:45 +0100 | [diff] [blame] | 20 | import re | 
| Gilles Peskine | b4edff9 | 2021-03-30 19:09:05 +0200 | [diff] [blame] | 21 | from typing import Dict, Iterable, Iterator, List, Optional, Pattern, Set, Tuple, Union | 
|  | 22 |  | 
|  | 23 |  | 
|  | 24 | class ReadFileLineException(Exception): | 
|  | 25 | def __init__(self, filename: str, line_number: Union[int, str]) -> None: | 
|  | 26 | message = 'in {} at {}'.format(filename, line_number) | 
|  | 27 | super(ReadFileLineException, self).__init__(message) | 
|  | 28 | self.filename = filename | 
|  | 29 | self.line_number = line_number | 
|  | 30 |  | 
|  | 31 |  | 
|  | 32 | class read_file_lines: | 
|  | 33 | # Dear Pylint, conventionally, a context manager class name is lowercase. | 
|  | 34 | # pylint: disable=invalid-name,too-few-public-methods | 
|  | 35 | """Context manager to read a text file line by line. | 
|  | 36 |  | 
|  | 37 | ``` | 
|  | 38 | with read_file_lines(filename) as lines: | 
|  | 39 | for line in lines: | 
|  | 40 | process(line) | 
|  | 41 | ``` | 
|  | 42 | is equivalent to | 
|  | 43 | ``` | 
|  | 44 | with open(filename, 'r') as input_file: | 
|  | 45 | for line in input_file: | 
|  | 46 | process(line) | 
|  | 47 | ``` | 
|  | 48 | except that if process(line) raises an exception, then the read_file_lines | 
|  | 49 | snippet annotates the exception with the file name and line number. | 
|  | 50 | """ | 
|  | 51 | def __init__(self, filename: str, binary: bool = False) -> None: | 
|  | 52 | self.filename = filename | 
|  | 53 | self.line_number = 'entry' #type: Union[int, str] | 
|  | 54 | self.generator = None #type: Optional[Iterable[Tuple[int, str]]] | 
|  | 55 | self.binary = binary | 
|  | 56 | def __enter__(self) -> 'read_file_lines': | 
|  | 57 | self.generator = enumerate(open(self.filename, | 
|  | 58 | 'rb' if self.binary else 'r')) | 
|  | 59 | return self | 
|  | 60 | def __iter__(self) -> Iterator[str]: | 
|  | 61 | assert self.generator is not None | 
|  | 62 | for line_number, content in self.generator: | 
|  | 63 | self.line_number = line_number | 
|  | 64 | yield content | 
|  | 65 | self.line_number = 'exit' | 
|  | 66 | def __exit__(self, exc_type, exc_value, exc_traceback) -> None: | 
|  | 67 | if exc_type is not None: | 
|  | 68 | raise ReadFileLineException(self.filename, self.line_number) \ | 
|  | 69 | from exc_value | 
| Gilles Peskine | 22fcf1b | 2021-03-10 01:02:39 +0100 | [diff] [blame] | 70 |  | 
|  | 71 |  | 
|  | 72 | class PSAMacroEnumerator: | 
|  | 73 | """Information about constructors of various PSA Crypto types. | 
|  | 74 |  | 
|  | 75 | This includes macro names as well as information about their arguments | 
|  | 76 | when applicable. | 
|  | 77 |  | 
|  | 78 | This class only provides ways to enumerate expressions that evaluate to | 
|  | 79 | values of the covered types. Derived classes are expected to populate | 
|  | 80 | the set of known constructors of each kind, as well as populate | 
|  | 81 | `self.arguments_for` for arguments that are not of a kind that is | 
|  | 82 | enumerated here. | 
|  | 83 | """ | 
|  | 84 |  | 
|  | 85 | def __init__(self) -> None: | 
|  | 86 | """Set up an empty set of known constructor macros. | 
|  | 87 | """ | 
|  | 88 | self.statuses = set() #type: Set[str] | 
|  | 89 | self.algorithms = set() #type: Set[str] | 
|  | 90 | self.ecc_curves = set() #type: Set[str] | 
|  | 91 | self.dh_groups = set() #type: Set[str] | 
|  | 92 | self.key_types = set() #type: Set[str] | 
|  | 93 | self.key_usage_flags = set() #type: Set[str] | 
|  | 94 | self.hash_algorithms = set() #type: Set[str] | 
|  | 95 | self.mac_algorithms = set() #type: Set[str] | 
|  | 96 | self.ka_algorithms = set() #type: Set[str] | 
|  | 97 | self.kdf_algorithms = set() #type: Set[str] | 
| Janos Follath | 8603fb0 | 2021-04-19 15:12:46 +0100 | [diff] [blame] | 98 | self.pake_algorithms = set() #type: Set[str] | 
| Gilles Peskine | 22fcf1b | 2021-03-10 01:02:39 +0100 | [diff] [blame] | 99 | self.aead_algorithms = set() #type: Set[str] | 
|  | 100 | # macro name -> list of argument names | 
|  | 101 | self.argspecs = {} #type: Dict[str, List[str]] | 
|  | 102 | # argument name -> list of values | 
|  | 103 | self.arguments_for = { | 
|  | 104 | 'mac_length': [], | 
|  | 105 | 'min_mac_length': [], | 
|  | 106 | 'tag_length': [], | 
|  | 107 | 'min_tag_length': [], | 
|  | 108 | } #type: Dict[str, List[str]] | 
| Gilles Peskine | 2157e86 | 2021-05-20 21:37:06 +0200 | [diff] [blame] | 109 | # Whether to include intermediate macros in enumerations. Intermediate | 
|  | 110 | # macros serve as category headers and are not valid values of their | 
|  | 111 | # type. See `is_internal_name`. | 
|  | 112 | # Always false in this class, may be set to true in derived classes. | 
| Gilles Peskine | 537d5fa | 2021-04-19 13:50:25 +0200 | [diff] [blame] | 113 | self.include_intermediate = False | 
|  | 114 |  | 
|  | 115 | def is_internal_name(self, name: str) -> bool: | 
|  | 116 | """Whether this is an internal macro. Internal macros will be skipped.""" | 
|  | 117 | if not self.include_intermediate: | 
|  | 118 | if name.endswith('_BASE') or name.endswith('_NONE'): | 
|  | 119 | return True | 
|  | 120 | if '_CATEGORY_' in name: | 
|  | 121 | return True | 
|  | 122 | return name.endswith('_FLAG') or name.endswith('_MASK') | 
| Gilles Peskine | 22fcf1b | 2021-03-10 01:02:39 +0100 | [diff] [blame] | 123 |  | 
|  | 124 | def gather_arguments(self) -> None: | 
|  | 125 | """Populate the list of values for macro arguments. | 
|  | 126 |  | 
|  | 127 | Call this after parsing all the inputs. | 
|  | 128 | """ | 
|  | 129 | self.arguments_for['hash_alg'] = sorted(self.hash_algorithms) | 
|  | 130 | self.arguments_for['mac_alg'] = sorted(self.mac_algorithms) | 
|  | 131 | self.arguments_for['ka_alg'] = sorted(self.ka_algorithms) | 
|  | 132 | self.arguments_for['kdf_alg'] = sorted(self.kdf_algorithms) | 
|  | 133 | self.arguments_for['aead_alg'] = sorted(self.aead_algorithms) | 
|  | 134 | self.arguments_for['curve'] = sorted(self.ecc_curves) | 
|  | 135 | self.arguments_for['group'] = sorted(self.dh_groups) | 
|  | 136 |  | 
|  | 137 | @staticmethod | 
|  | 138 | def _format_arguments(name: str, arguments: Iterable[str]) -> str: | 
| Gilles Peskine | cccd1ac | 2021-04-21 15:36:58 +0200 | [diff] [blame] | 139 | """Format a macro call with arguments. | 
|  | 140 |  | 
|  | 141 | The resulting format is consistent with | 
|  | 142 | `InputsForTest.normalize_argument`. | 
|  | 143 | """ | 
| Gilles Peskine | 22fcf1b | 2021-03-10 01:02:39 +0100 | [diff] [blame] | 144 | return name + '(' + ', '.join(arguments) + ')' | 
|  | 145 |  | 
|  | 146 | _argument_split_re = re.compile(r' *, *') | 
|  | 147 | @classmethod | 
|  | 148 | def _argument_split(cls, arguments: str) -> List[str]: | 
|  | 149 | return re.split(cls._argument_split_re, arguments) | 
|  | 150 |  | 
|  | 151 | def distribute_arguments(self, name: str) -> Iterator[str]: | 
|  | 152 | """Generate macro calls with each tested argument set. | 
|  | 153 |  | 
|  | 154 | If name is a macro without arguments, just yield "name". | 
|  | 155 | If name is a macro with arguments, yield a series of | 
|  | 156 | "name(arg1,...,argN)" where each argument takes each possible | 
|  | 157 | value at least once. | 
|  | 158 | """ | 
|  | 159 | try: | 
|  | 160 | if name not in self.argspecs: | 
|  | 161 | yield name | 
|  | 162 | return | 
|  | 163 | argspec = self.argspecs[name] | 
|  | 164 | if argspec == []: | 
|  | 165 | yield name + '()' | 
|  | 166 | return | 
|  | 167 | argument_lists = [self.arguments_for[arg] for arg in argspec] | 
|  | 168 | arguments = [values[0] for values in argument_lists] | 
|  | 169 | yield self._format_arguments(name, arguments) | 
|  | 170 | # Dear Pylint, enumerate won't work here since we're modifying | 
|  | 171 | # the array. | 
|  | 172 | # pylint: disable=consider-using-enumerate | 
|  | 173 | for i in range(len(arguments)): | 
|  | 174 | for value in argument_lists[i][1:]: | 
|  | 175 | arguments[i] = value | 
|  | 176 | yield self._format_arguments(name, arguments) | 
|  | 177 | arguments[i] = argument_lists[0][0] | 
|  | 178 | except BaseException as e: | 
|  | 179 | raise Exception('distribute_arguments({})'.format(name)) from e | 
|  | 180 |  | 
| Gilles Peskine | 38ebfec | 2021-04-21 15:37:34 +0200 | [diff] [blame] | 181 | def distribute_arguments_without_duplicates( | 
|  | 182 | self, seen: Set[str], name: str | 
|  | 183 | ) -> Iterator[str]: | 
|  | 184 | """Same as `distribute_arguments`, but don't repeat seen results.""" | 
|  | 185 | for result in self.distribute_arguments(name): | 
|  | 186 | if result not in seen: | 
|  | 187 | seen.add(result) | 
|  | 188 | yield result | 
|  | 189 |  | 
| Gilles Peskine | 22fcf1b | 2021-03-10 01:02:39 +0100 | [diff] [blame] | 190 | def generate_expressions(self, names: Iterable[str]) -> Iterator[str]: | 
|  | 191 | """Generate expressions covering values constructed from the given names. | 
|  | 192 |  | 
|  | 193 | `names` can be any iterable collection of macro names. | 
|  | 194 |  | 
|  | 195 | For example: | 
|  | 196 | * ``generate_expressions(['PSA_ALG_CMAC', 'PSA_ALG_HMAC'])`` | 
|  | 197 | generates ``'PSA_ALG_CMAC'`` as well as ``'PSA_ALG_HMAC(h)'`` for | 
|  | 198 | every known hash algorithm ``h``. | 
|  | 199 | * ``macros.generate_expressions(macros.key_types)`` generates all | 
|  | 200 | key types. | 
|  | 201 | """ | 
| Gilles Peskine | 38ebfec | 2021-04-21 15:37:34 +0200 | [diff] [blame] | 202 | seen = set() #type: Set[str] | 
|  | 203 | return itertools.chain(*( | 
|  | 204 | self.distribute_arguments_without_duplicates(seen, name) | 
|  | 205 | for name in names | 
|  | 206 | )) | 
| Gilles Peskine | 22fcf1b | 2021-03-10 01:02:39 +0100 | [diff] [blame] | 207 |  | 
| Gilles Peskine | e7c4455 | 2021-01-25 21:40:45 +0100 | [diff] [blame] | 208 |  | 
| Gilles Peskine | 33c601c | 2021-03-10 01:25:50 +0100 | [diff] [blame] | 209 | class PSAMacroCollector(PSAMacroEnumerator): | 
| Gilles Peskine | e7c4455 | 2021-01-25 21:40:45 +0100 | [diff] [blame] | 210 | """Collect PSA crypto macro definitions from C header files. | 
|  | 211 | """ | 
|  | 212 |  | 
| Gilles Peskine | 10ab267 | 2021-03-10 00:59:53 +0100 | [diff] [blame] | 213 | def __init__(self, include_intermediate: bool = False) -> None: | 
| Gilles Peskine | 13d60eb | 2021-01-25 22:42:14 +0100 | [diff] [blame] | 214 | """Set up an object to collect PSA macro definitions. | 
|  | 215 |  | 
|  | 216 | Call the read_file method of the constructed object on each header file. | 
|  | 217 |  | 
|  | 218 | * include_intermediate: if true, include intermediate macros such as | 
|  | 219 | PSA_XXX_BASE that do not designate semantic values. | 
|  | 220 | """ | 
| Gilles Peskine | 33c601c | 2021-03-10 01:25:50 +0100 | [diff] [blame] | 221 | super().__init__() | 
| Gilles Peskine | 13d60eb | 2021-01-25 22:42:14 +0100 | [diff] [blame] | 222 | self.include_intermediate = include_intermediate | 
| Gilles Peskine | 10ab267 | 2021-03-10 00:59:53 +0100 | [diff] [blame] | 223 | self.key_types_from_curve = {} #type: Dict[str, str] | 
|  | 224 | self.key_types_from_group = {} #type: Dict[str, str] | 
| Gilles Peskine | 10ab267 | 2021-03-10 00:59:53 +0100 | [diff] [blame] | 225 | self.algorithms_from_hash = {} #type: Dict[str, str] | 
| Gilles Peskine | e7c4455 | 2021-01-25 21:40:45 +0100 | [diff] [blame] | 226 |  | 
| Gilles Peskine | 33c601c | 2021-03-10 01:25:50 +0100 | [diff] [blame] | 227 | def record_algorithm_subtype(self, name: str, expansion: str) -> None: | 
|  | 228 | """Record the subtype of an algorithm constructor. | 
|  | 229 |  | 
|  | 230 | Given a ``PSA_ALG_xxx`` macro name and its expansion, if the algorithm | 
|  | 231 | is of a subtype that is tracked in its own set, add it to the relevant | 
|  | 232 | set. | 
|  | 233 | """ | 
|  | 234 | # This code is very ad hoc and fragile. It should be replaced by | 
|  | 235 | # something more robust. | 
|  | 236 | if re.match(r'MAC(?:_|\Z)', name): | 
|  | 237 | self.mac_algorithms.add(name) | 
|  | 238 | elif re.match(r'KDF(?:_|\Z)', name): | 
|  | 239 | self.kdf_algorithms.add(name) | 
|  | 240 | elif re.search(r'0x020000[0-9A-Fa-f]{2}', expansion): | 
|  | 241 | self.hash_algorithms.add(name) | 
|  | 242 | elif re.search(r'0x03[0-9A-Fa-f]{6}', expansion): | 
|  | 243 | self.mac_algorithms.add(name) | 
|  | 244 | elif re.search(r'0x05[0-9A-Fa-f]{6}', expansion): | 
|  | 245 | self.aead_algorithms.add(name) | 
|  | 246 | elif re.search(r'0x09[0-9A-Fa-f]{2}0000', expansion): | 
|  | 247 | self.ka_algorithms.add(name) | 
|  | 248 | elif re.search(r'0x08[0-9A-Fa-f]{6}', expansion): | 
|  | 249 | self.kdf_algorithms.add(name) | 
|  | 250 |  | 
| Gilles Peskine | e7c4455 | 2021-01-25 21:40:45 +0100 | [diff] [blame] | 251 | # "#define" followed by a macro name with either no parameters | 
|  | 252 | # or a single parameter and a non-empty expansion. | 
|  | 253 | # Grab the macro name in group 1, the parameter name if any in group 2 | 
|  | 254 | # and the expansion in group 3. | 
|  | 255 | _define_directive_re = re.compile(r'\s*#\s*define\s+(\w+)' + | 
|  | 256 | r'(?:\s+|\((\w+)\)\s*)' + | 
|  | 257 | r'(.+)') | 
|  | 258 | _deprecated_definition_re = re.compile(r'\s*MBEDTLS_DEPRECATED') | 
|  | 259 |  | 
|  | 260 | def read_line(self, line): | 
|  | 261 | """Parse a C header line and record the PSA identifier it defines if any. | 
|  | 262 | This function analyzes lines that start with "#define PSA_" | 
|  | 263 | (up to non-significant whitespace) and skips all non-matching lines. | 
|  | 264 | """ | 
|  | 265 | # pylint: disable=too-many-branches | 
|  | 266 | m = re.match(self._define_directive_re, line) | 
|  | 267 | if not m: | 
|  | 268 | return | 
|  | 269 | name, parameter, expansion = m.groups() | 
|  | 270 | expansion = re.sub(r'/\*.*?\*/|//.*', r' ', expansion) | 
| Gilles Peskine | 33c601c | 2021-03-10 01:25:50 +0100 | [diff] [blame] | 271 | if parameter: | 
|  | 272 | self.argspecs[name] = [parameter] | 
| Gilles Peskine | e7c4455 | 2021-01-25 21:40:45 +0100 | [diff] [blame] | 273 | if re.match(self._deprecated_definition_re, expansion): | 
|  | 274 | # Skip deprecated values, which are assumed to be | 
|  | 275 | # backward compatibility aliases that share | 
|  | 276 | # numerical values with non-deprecated values. | 
|  | 277 | return | 
| Gilles Peskine | f8deb75 | 2021-01-25 22:41:45 +0100 | [diff] [blame] | 278 | if self.is_internal_name(name): | 
| Gilles Peskine | e7c4455 | 2021-01-25 21:40:45 +0100 | [diff] [blame] | 279 | # Macro only to build actual values | 
|  | 280 | return | 
|  | 281 | elif (name.startswith('PSA_ERROR_') or name == 'PSA_SUCCESS') \ | 
|  | 282 | and not parameter: | 
|  | 283 | self.statuses.add(name) | 
|  | 284 | elif name.startswith('PSA_KEY_TYPE_') and not parameter: | 
|  | 285 | self.key_types.add(name) | 
|  | 286 | elif name.startswith('PSA_KEY_TYPE_') and parameter == 'curve': | 
|  | 287 | self.key_types_from_curve[name] = name[:13] + 'IS_' + name[13:] | 
|  | 288 | elif name.startswith('PSA_KEY_TYPE_') and parameter == 'group': | 
|  | 289 | self.key_types_from_group[name] = name[:13] + 'IS_' + name[13:] | 
|  | 290 | elif name.startswith('PSA_ECC_FAMILY_') and not parameter: | 
|  | 291 | self.ecc_curves.add(name) | 
|  | 292 | elif name.startswith('PSA_DH_FAMILY_') and not parameter: | 
|  | 293 | self.dh_groups.add(name) | 
|  | 294 | elif name.startswith('PSA_ALG_') and not parameter: | 
|  | 295 | if name in ['PSA_ALG_ECDSA_BASE', | 
|  | 296 | 'PSA_ALG_RSA_PKCS1V15_SIGN_BASE']: | 
|  | 297 | # Ad hoc skipping of duplicate names for some numerical values | 
|  | 298 | return | 
|  | 299 | self.algorithms.add(name) | 
| Gilles Peskine | 33c601c | 2021-03-10 01:25:50 +0100 | [diff] [blame] | 300 | self.record_algorithm_subtype(name, expansion) | 
| Gilles Peskine | e7c4455 | 2021-01-25 21:40:45 +0100 | [diff] [blame] | 301 | elif name.startswith('PSA_ALG_') and parameter == 'hash_alg': | 
|  | 302 | if name in ['PSA_ALG_DSA', 'PSA_ALG_ECDSA']: | 
|  | 303 | # A naming irregularity | 
|  | 304 | tester = name[:8] + 'IS_RANDOMIZED_' + name[8:] | 
|  | 305 | else: | 
|  | 306 | tester = name[:8] + 'IS_' + name[8:] | 
|  | 307 | self.algorithms_from_hash[name] = tester | 
|  | 308 | elif name.startswith('PSA_KEY_USAGE_') and not parameter: | 
| Gilles Peskine | 33c601c | 2021-03-10 01:25:50 +0100 | [diff] [blame] | 309 | self.key_usage_flags.add(name) | 
| Gilles Peskine | e7c4455 | 2021-01-25 21:40:45 +0100 | [diff] [blame] | 310 | else: | 
|  | 311 | # Other macro without parameter | 
|  | 312 | return | 
|  | 313 |  | 
|  | 314 | _nonascii_re = re.compile(rb'[^\x00-\x7f]+') | 
|  | 315 | _continued_line_re = re.compile(rb'\\\r?\n\Z') | 
|  | 316 | def read_file(self, header_file): | 
|  | 317 | for line in header_file: | 
|  | 318 | m = re.search(self._continued_line_re, line) | 
|  | 319 | while m: | 
|  | 320 | cont = next(header_file) | 
|  | 321 | line = line[:m.start(0)] + cont | 
|  | 322 | m = re.search(self._continued_line_re, line) | 
|  | 323 | line = re.sub(self._nonascii_re, rb'', line).decode('ascii') | 
|  | 324 | self.read_line(line) | 
| Gilles Peskine | b4edff9 | 2021-03-30 19:09:05 +0200 | [diff] [blame] | 325 |  | 
|  | 326 |  | 
| Gilles Peskine | 537d5fa | 2021-04-19 13:50:25 +0200 | [diff] [blame] | 327 | class InputsForTest(PSAMacroEnumerator): | 
| Gilles Peskine | b4edff9 | 2021-03-30 19:09:05 +0200 | [diff] [blame] | 328 | # pylint: disable=too-many-instance-attributes | 
|  | 329 | """Accumulate information about macros to test. | 
|  | 330 | enumerate | 
|  | 331 | This includes macro names as well as information about their arguments | 
|  | 332 | when applicable. | 
|  | 333 | """ | 
|  | 334 |  | 
|  | 335 | def __init__(self) -> None: | 
|  | 336 | super().__init__() | 
|  | 337 | self.all_declared = set() #type: Set[str] | 
| Gilles Peskine | b4edff9 | 2021-03-30 19:09:05 +0200 | [diff] [blame] | 338 | # Identifier prefixes | 
|  | 339 | self.table_by_prefix = { | 
|  | 340 | 'ERROR': self.statuses, | 
|  | 341 | 'ALG': self.algorithms, | 
|  | 342 | 'ECC_CURVE': self.ecc_curves, | 
|  | 343 | 'DH_GROUP': self.dh_groups, | 
|  | 344 | 'KEY_TYPE': self.key_types, | 
|  | 345 | 'KEY_USAGE': self.key_usage_flags, | 
|  | 346 | } #type: Dict[str, Set[str]] | 
|  | 347 | # Test functions | 
|  | 348 | self.table_by_test_function = { | 
|  | 349 | # Any function ending in _algorithm also gets added to | 
|  | 350 | # self.algorithms. | 
|  | 351 | 'key_type': [self.key_types], | 
|  | 352 | 'block_cipher_key_type': [self.key_types], | 
|  | 353 | 'stream_cipher_key_type': [self.key_types], | 
|  | 354 | 'ecc_key_family': [self.ecc_curves], | 
|  | 355 | 'ecc_key_types': [self.ecc_curves], | 
|  | 356 | 'dh_key_family': [self.dh_groups], | 
|  | 357 | 'dh_key_types': [self.dh_groups], | 
|  | 358 | 'hash_algorithm': [self.hash_algorithms], | 
|  | 359 | 'mac_algorithm': [self.mac_algorithms], | 
|  | 360 | 'cipher_algorithm': [], | 
|  | 361 | 'hmac_algorithm': [self.mac_algorithms], | 
|  | 362 | 'aead_algorithm': [self.aead_algorithms], | 
|  | 363 | 'key_derivation_algorithm': [self.kdf_algorithms], | 
|  | 364 | 'key_agreement_algorithm': [self.ka_algorithms], | 
|  | 365 | 'asymmetric_signature_algorithm': [], | 
|  | 366 | 'asymmetric_signature_wildcard': [self.algorithms], | 
|  | 367 | 'asymmetric_encryption_algorithm': [], | 
| Janos Follath | 8603fb0 | 2021-04-19 15:12:46 +0100 | [diff] [blame] | 368 | 'pake_algorithm': [self.pake_algorithms], | 
| Gilles Peskine | b4edff9 | 2021-03-30 19:09:05 +0200 | [diff] [blame] | 369 | 'other_algorithm': [], | 
|  | 370 | } #type: Dict[str, List[Set[str]]] | 
|  | 371 | self.arguments_for['mac_length'] += ['1', '63'] | 
|  | 372 | self.arguments_for['min_mac_length'] += ['1', '63'] | 
|  | 373 | self.arguments_for['tag_length'] += ['1', '63'] | 
|  | 374 | self.arguments_for['min_tag_length'] += ['1', '63'] | 
|  | 375 |  | 
| Gilles Peskine | 3d404b8 | 2021-03-30 21:46:35 +0200 | [diff] [blame] | 376 | def add_numerical_values(self) -> None: | 
|  | 377 | """Add numerical values that are not supported to the known identifiers.""" | 
|  | 378 | # Sets of names per type | 
|  | 379 | self.algorithms.add('0xffffffff') | 
|  | 380 | self.ecc_curves.add('0xff') | 
|  | 381 | self.dh_groups.add('0xff') | 
|  | 382 | self.key_types.add('0xffff') | 
|  | 383 | self.key_usage_flags.add('0x80000000') | 
|  | 384 |  | 
|  | 385 | # Hard-coded values for unknown algorithms | 
|  | 386 | # | 
|  | 387 | # These have to have values that are correct for their respective | 
|  | 388 | # PSA_ALG_IS_xxx macros, but are also not currently assigned and are | 
|  | 389 | # not likely to be assigned in the near future. | 
|  | 390 | self.hash_algorithms.add('0x020000fe') # 0x020000ff is PSA_ALG_ANY_HASH | 
|  | 391 | self.mac_algorithms.add('0x03007fff') | 
|  | 392 | self.ka_algorithms.add('0x09fc0000') | 
|  | 393 | self.kdf_algorithms.add('0x080000ff') | 
| Janos Follath | 8603fb0 | 2021-04-19 15:12:46 +0100 | [diff] [blame] | 394 | self.pake_algorithms.add('0x0a0000ff') | 
| Gilles Peskine | 3d404b8 | 2021-03-30 21:46:35 +0200 | [diff] [blame] | 395 | # For AEAD algorithms, the only variability is over the tag length, | 
|  | 396 | # and this only applies to known algorithms, so don't test an | 
|  | 397 | # unknown algorithm. | 
|  | 398 |  | 
| Gilles Peskine | b4edff9 | 2021-03-30 19:09:05 +0200 | [diff] [blame] | 399 | def get_names(self, type_word: str) -> Set[str]: | 
|  | 400 | """Return the set of known names of values of the given type.""" | 
|  | 401 | return { | 
|  | 402 | 'status': self.statuses, | 
|  | 403 | 'algorithm': self.algorithms, | 
|  | 404 | 'ecc_curve': self.ecc_curves, | 
|  | 405 | 'dh_group': self.dh_groups, | 
|  | 406 | 'key_type': self.key_types, | 
|  | 407 | 'key_usage': self.key_usage_flags, | 
|  | 408 | }[type_word] | 
|  | 409 |  | 
|  | 410 | # Regex for interesting header lines. | 
|  | 411 | # Groups: 1=macro name, 2=type, 3=argument list (optional). | 
|  | 412 | _header_line_re = \ | 
|  | 413 | re.compile(r'#define +' + | 
|  | 414 | r'(PSA_((?:(?:DH|ECC|KEY)_)?[A-Z]+)_\w+)' + | 
|  | 415 | r'(?:\(([^\n()]*)\))?') | 
|  | 416 | # Regex of macro names to exclude. | 
|  | 417 | _excluded_name_re = re.compile(r'_(?:GET|IS|OF)_|_(?:BASE|FLAG|MASK)\Z') | 
|  | 418 | # Additional excluded macros. | 
|  | 419 | _excluded_names = set([ | 
|  | 420 | # Macros that provide an alternative way to build the same | 
|  | 421 | # algorithm as another macro. | 
|  | 422 | 'PSA_ALG_AEAD_WITH_DEFAULT_LENGTH_TAG', | 
|  | 423 | 'PSA_ALG_FULL_LENGTH_MAC', | 
|  | 424 | # Auxiliary macro whose name doesn't fit the usual patterns for | 
|  | 425 | # auxiliary macros. | 
|  | 426 | 'PSA_ALG_AEAD_WITH_DEFAULT_LENGTH_TAG_CASE', | 
|  | 427 | ]) | 
|  | 428 | def parse_header_line(self, line: str) -> None: | 
|  | 429 | """Parse a C header line, looking for "#define PSA_xxx".""" | 
|  | 430 | m = re.match(self._header_line_re, line) | 
|  | 431 | if not m: | 
|  | 432 | return | 
|  | 433 | name = m.group(1) | 
|  | 434 | self.all_declared.add(name) | 
|  | 435 | if re.search(self._excluded_name_re, name) or \ | 
| Gilles Peskine | 537d5fa | 2021-04-19 13:50:25 +0200 | [diff] [blame] | 436 | name in self._excluded_names or \ | 
|  | 437 | self.is_internal_name(name): | 
| Gilles Peskine | b4edff9 | 2021-03-30 19:09:05 +0200 | [diff] [blame] | 438 | return | 
|  | 439 | dest = self.table_by_prefix.get(m.group(2)) | 
|  | 440 | if dest is None: | 
|  | 441 | return | 
|  | 442 | dest.add(name) | 
|  | 443 | if m.group(3): | 
|  | 444 | self.argspecs[name] = self._argument_split(m.group(3)) | 
|  | 445 |  | 
|  | 446 | _nonascii_re = re.compile(rb'[^\x00-\x7f]+') #type: Pattern | 
|  | 447 | def parse_header(self, filename: str) -> None: | 
|  | 448 | """Parse a C header file, looking for "#define PSA_xxx".""" | 
|  | 449 | with read_file_lines(filename, binary=True) as lines: | 
|  | 450 | for line in lines: | 
|  | 451 | line = re.sub(self._nonascii_re, rb'', line).decode('ascii') | 
|  | 452 | self.parse_header_line(line) | 
|  | 453 |  | 
|  | 454 | _macro_identifier_re = re.compile(r'[A-Z]\w+') | 
|  | 455 | def generate_undeclared_names(self, expr: str) -> Iterable[str]: | 
|  | 456 | for name in re.findall(self._macro_identifier_re, expr): | 
|  | 457 | if name not in self.all_declared: | 
|  | 458 | yield name | 
|  | 459 |  | 
|  | 460 | def accept_test_case_line(self, function: str, argument: str) -> bool: | 
|  | 461 | #pylint: disable=unused-argument | 
|  | 462 | undeclared = list(self.generate_undeclared_names(argument)) | 
|  | 463 | if undeclared: | 
|  | 464 | raise Exception('Undeclared names in test case', undeclared) | 
|  | 465 | return True | 
|  | 466 |  | 
| Gilles Peskine | cccd1ac | 2021-04-21 15:36:58 +0200 | [diff] [blame] | 467 | @staticmethod | 
|  | 468 | def normalize_argument(argument: str) -> str: | 
|  | 469 | """Normalize whitespace in the given C expression. | 
|  | 470 |  | 
|  | 471 | The result uses the same whitespace as | 
|  | 472 | ` PSAMacroEnumerator.distribute_arguments`. | 
|  | 473 | """ | 
|  | 474 | return re.sub(r',', r', ', re.sub(r' +', r'', argument)) | 
|  | 475 |  | 
| Gilles Peskine | b4edff9 | 2021-03-30 19:09:05 +0200 | [diff] [blame] | 476 | def add_test_case_line(self, function: str, argument: str) -> None: | 
|  | 477 | """Parse a test case data line, looking for algorithm metadata tests.""" | 
|  | 478 | sets = [] | 
|  | 479 | if function.endswith('_algorithm'): | 
|  | 480 | sets.append(self.algorithms) | 
|  | 481 | if function == 'key_agreement_algorithm' and \ | 
|  | 482 | argument.startswith('PSA_ALG_KEY_AGREEMENT('): | 
|  | 483 | # We only want *raw* key agreement algorithms as such, so | 
|  | 484 | # exclude ones that are already chained with a KDF. | 
|  | 485 | # Keep the expression as one to test as an algorithm. | 
|  | 486 | function = 'other_algorithm' | 
|  | 487 | sets += self.table_by_test_function[function] | 
|  | 488 | if self.accept_test_case_line(function, argument): | 
|  | 489 | for s in sets: | 
| Gilles Peskine | cccd1ac | 2021-04-21 15:36:58 +0200 | [diff] [blame] | 490 | s.add(self.normalize_argument(argument)) | 
| Gilles Peskine | b4edff9 | 2021-03-30 19:09:05 +0200 | [diff] [blame] | 491 |  | 
|  | 492 | # Regex matching a *.data line containing a test function call and | 
|  | 493 | # its arguments. The actual definition is partly positional, but this | 
|  | 494 | # regex is good enough in practice. | 
|  | 495 | _test_case_line_re = re.compile(r'(?!depends_on:)(\w+):([^\n :][^:\n]*)') | 
|  | 496 | def parse_test_cases(self, filename: str) -> None: | 
|  | 497 | """Parse a test case file (*.data), looking for algorithm metadata tests.""" | 
|  | 498 | with read_file_lines(filename) as lines: | 
|  | 499 | for line in lines: | 
|  | 500 | m = re.match(self._test_case_line_re, line) | 
|  | 501 | if m: | 
|  | 502 | self.add_test_case_line(m.group(1), m.group(2)) |