Python module to parse function declarations from a header file
Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com>
diff --git a/scripts/mbedtls_dev/c_parsing_helper.py b/scripts/mbedtls_dev/c_parsing_helper.py
new file mode 100644
index 0000000..3bb6f04
--- /dev/null
+++ b/scripts/mbedtls_dev/c_parsing_helper.py
@@ -0,0 +1,127 @@
+"""Helper functions to parse C code in heavily constrained scenarios.
+
+Currently supported functionality:
+
+* read_function_declarations: read function declarations from a header file.
+"""
+
+# Copyright The Mbed TLS Contributors
+# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+
+import re
+from typing import Dict, Iterable, Iterator, List, Optional, Tuple
+
+
+class ArgumentInfo:
+ """Information about an argument to an API function."""
+ #pylint: disable=too-few-public-methods
+
+ _KEYWORDS = [
+ 'const', 'register', 'restrict',
+ 'int', 'long', 'short', 'signed', 'unsigned',
+ ]
+ _DECLARATION_RE = re.compile(
+ r'(?P<type>\w[\w\s*]*?)\s*' +
+ r'(?!(?:' + r'|'.join(_KEYWORDS) + r'))(?P<name>\b\w+\b)?' +
+ r'\s*(?P<suffix>\[[^][]*\])?\Z',
+ re.A | re.S)
+
+ @classmethod
+ def normalize_type(cls, typ: str) -> str:
+ """Normalize whitespace in a type."""
+ typ = re.sub(r'\s+', r' ', typ)
+ typ = re.sub(r'\s*\*', r' *', typ)
+ return typ
+
+ def __init__(self, decl: str) -> None:
+ self.decl = decl.strip()
+ m = self._DECLARATION_RE.match(self.decl)
+ if not m:
+ raise ValueError(self.decl)
+ self.type = self.normalize_type(m.group('type')) #type: str
+ self.name = m.group('name') #type: Optional[str]
+ self.suffix = m.group('suffix') if m.group('suffix') else '' #type: str
+
+
+class FunctionInfo:
+ """Information about an API function."""
+ #pylint: disable=too-few-public-methods
+
+ # Regex matching the declaration of a function that returns void.
+ VOID_RE = re.compile(r'\s*\bvoid\s*\Z', re.A)
+
+ def __init__(self, #pylint: disable=too-many-arguments
+ filename: str,
+ line_number: int,
+ qualifiers: Iterable[str],
+ return_type: str,
+ name: str,
+ arguments: List[str]) -> None:
+ self.filename = filename
+ self.line_number = line_number
+ self.qualifiers = frozenset(qualifiers)
+ self.return_type = return_type
+ self.name = name
+ self.arguments = [ArgumentInfo(arg) for arg in arguments]
+
+ def returns_void(self) -> bool:
+ """Whether the function returns void."""
+ return bool(self.VOID_RE.search(self.return_type))
+
+
+# Match one C comment.
+# Note that we match both comment types, so things like // in a /*...*/
+# comment are handled correctly.
+_C_COMMENT_RE = re.compile(r'//[^n]*|/\*.*?\*/', re.S)
+_NOT_NEWLINES_RE = re.compile(r'[^\n]+')
+
+def read_logical_lines(filename: str) -> Iterator[Tuple[int, str]]:
+ """Read logical lines from a file.
+
+ Logical lines are one or more physical line, with balanced parentheses.
+ """
+ with open(filename, encoding='utf-8') as inp:
+ content = inp.read()
+ # Strip comments, but keep newlines for line numbering
+ content = re.sub(_C_COMMENT_RE,
+ lambda m: re.sub(_NOT_NEWLINES_RE, "", m.group(0)),
+ content)
+ lines = enumerate(content.splitlines(), 1)
+ for line_number, line in lines:
+ # Read a logical line, containing balanced parentheses.
+ # We assume that parentheses are balanced (this should be ok
+ # since comments have been stripped), otherwise there will be
+ # a gigantic logical line at the end.
+ paren_level = line.count('(') - line.count(')')
+ while paren_level > 0:
+ _, more = next(lines) #pylint: disable=stop-iteration-return
+ paren_level += more.count('(') - more.count(')')
+ line += '\n' + more
+ yield line_number, line
+
+_C_FUNCTION_DECLARATION_RE = re.compile(
+ r'(?P<qualifiers>(?:(?:extern|inline|static)\b\s*)*)'
+ r'(?P<return_type>\w[\w\s*]*?)\s*' +
+ r'\b(?P<name>\w+)' +
+ r'\s*\((?P<arguments>.*)\)\s*;',
+ re.A | re.S)
+
+def read_function_declarations(functions: Dict[str, FunctionInfo],
+ filename: str) -> None:
+ """Collect function declarations from a C header file."""
+ for line_number, line in read_logical_lines(filename):
+ m = _C_FUNCTION_DECLARATION_RE.match(line)
+ if not m:
+ continue
+ qualifiers = m.group('qualifiers').split()
+ return_type = m.group('return_type')
+ name = m.group('name')
+ arguments = m.group('arguments').split(',')
+ if len(arguments) == 1 and re.match(FunctionInfo.VOID_RE, arguments[0]):
+ arguments = []
+ # Note: we replace any existing declaration for the same name.
+ functions[name] = FunctionInfo(filename, line_number,
+ qualifiers,
+ return_type,
+ name,
+ arguments)