feat(tools/cbmc): add script to check CBMC results
This commit adds a python script that compares the CBMC results to the
baseline summaries that are committed in the source tree. If there is a
significant difference, the script returns an error.
A call to the script is added to the CheckCBMC.cmake file. If the script
returns error, the script output is printed on the console, however the
build is not interrupted with an error.
Change-Id: I50e7b9a570c1d30e03def630c6541d63536bd1c1
Signed-off-by: Mate Toth-Pal <mate.toth-pal@arm.com>
diff --git a/tools/cbmc/CheckCBMC.cmake b/tools/cbmc/CheckCBMC.cmake
index 251f541..4eecfdb 100644
--- a/tools/cbmc/CheckCBMC.cmake
+++ b/tools/cbmc/CheckCBMC.cmake
@@ -8,6 +8,13 @@
find_program(RMM_CBMC_PATH "cbmc"
DOC "Path to cbmc.")
+find_package(Python3 REQUIRED)
+find_program(CHECK_CBMC_SUMMARY_EXECUTABLE "compare_summary.py"
+ PATHS ${CMAKE_SOURCE_DIR}
+ PATH_SUFFIXES tools/cbmc
+ DOC "Path to compare_summary.py"
+ )
+
#
# Configure and execute CBMC
#
@@ -193,3 +200,21 @@
endforeach()
message(STATUS "Result in ${RMM_TESTBENCH_RESULT_DIR}")
+
+execute_process(
+ WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
+ COMMAND ${CHECK_CBMC_SUMMARY_EXECUTABLE}
+ ${CMAKE_SOURCE_DIR}/tools/cbmc/testbenches_results/BASELINE.${CBMC_RESULT_FILE_SUFFIX}
+ ${RMM_TESTBENCH_RESULT_DIR}/${SUMMARY_FILE}
+ OUTPUT_VARIABLE CHECK_SUMMARY_OUTPUT
+ ERROR_VARIABLE CHECK_SUMMARY_ERROR
+ RESULT_VARIABLE CHECK_SUMMARY_RC
+ OUTPUT_STRIP_TRAILING_WHITESPACE
+ )
+
+if (NOT ${CHECK_SUMMARY_RC} EQUAL "0")
+ message(WARNING
+ "cbmc-${CBMC_RESULT_FILE_SUFFIX}: FAILED\n${CHECK_SUMMARY_ERROR}")
+else()
+ message(STATUS "cbmc-${CBMC_RESULT_FILE_SUFFIX}: PASSED")
+endif()
diff --git a/tools/cbmc/compare_summary.py b/tools/cbmc/compare_summary.py
new file mode 100755
index 0000000..ca4e4df
--- /dev/null
+++ b/tools/cbmc/compare_summary.py
@@ -0,0 +1,255 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: BSD-3-Clause
+# SPDX-FileCopyrightText: Copyright TF-RMM Contributors.
+#
+
+"""
+This script is used to compare the summary of the CBMC analysis with the
+baseline. The script prints an error message and returns error code if there is
+a parsing error, or the change in the summary is not compatible with the baseline.
+In the latter case, either the baseline should be updated, or the RMM code or the
+CBMC testbench needs to be fixed.
+
+The script assumes that the summary lines of the `analysis` and the `assert` modes
+have the same format and semantics
+"""
+
+import argparse
+import logging
+import re
+import sys
+
+
+class ParseException(Exception):
+ """
+ An exception of this type is raised in case of parsing error
+ """
+
+
+class CompareException(Exception):
+ """
+ An exception of this type is raised when there is a difference in the
+ summaries that should be fixed.
+ """
+
+
+re_summary_prefix = re.compile(r"\|\s*([a-z_.]+)\s*\|\s*(\d+)\s+of\s+(\d+)")
+re_separator = re.compile(r"\+-+\+-+\+")
+re_header = re.compile(r"\|\s*[A-Z_]+\s*\|\s*([A-Z_]+)\s*\|")
+re_failed_analysis = re.compile(r"\|\s*([a-z_.]+)\s*\|\s*N\/A\s*\|")
+
+
+def parse_summary_file(summary_file_name):
+ """
+ Parse a single summary file.
+
+ Returns a tuple:
+ (summary type as a string, list of summaries)
+
+ Each element in the list of summaries is a tuple of three values:
+ (testbench name, first integer value, second integer value)
+ """
+ logging.debug(f"Parsing file {summary_file_name}")
+ with open(summary_file_name, "r", encoding="utf-8") as file:
+ lines = file.readlines()
+
+ summary_type = None
+ summary_list = []
+
+ for idx, line in enumerate(lines):
+ line = line.strip()
+ if not line:
+ continue
+ m_separator = re_separator.match(line)
+ if m_separator:
+ continue
+ m_summary_prefix = re_summary_prefix.match(line)
+ if m_summary_prefix:
+ logging.debug(f" summary line '{line}'")
+ if not summary_type:
+ raise ParseException(
+ f"Missing summary header in file {summary_file_name}"
+ )
+ summary_list.append(
+ (
+ m_summary_prefix.group(1),
+ int(m_summary_prefix.group(2)),
+ int(m_summary_prefix.group(3)),
+ )
+ )
+ continue
+ m_header = re_header.match(line)
+ if m_header:
+ summary_type = m_header.group(1)
+ logging.debug(f" header line '{line}'")
+ continue
+ m_failed_analysis = re_failed_analysis.match(line)
+ if m_failed_analysis:
+ logging.debug(f" N/A line '{line}'")
+ raise ParseException(
+ f"CBMC Analysis failed in {summary_file_name} for {m_failed_analysis.group(1)} "
+ + "Please fix RMM/testbench code"
+ )
+ logging.debug(f" Parse failed on line '{line}'")
+ raise ParseException(f"Invalid line in {summary_file_name} at line {idx+1}")
+
+ if not summary_list:
+ raise ParseException(f"No summary in file {summary_file_name}")
+
+ logging.info(f"Summary type of {summary_file_name} is {summary_type}")
+
+ return summary_type, summary_list
+
+
+def compare_coverage_summary(testbench_name, baseline_results, current_results):
+ """
+ Compare two coverage summary lines.
+
+ The results must be in the form of tuple:
+ (first integer value, second integer value)
+ """
+ logging.debug(
+ f"Comparing {baseline_results[0]} of {baseline_results[1]} against "
+ + f"{current_results[0]} of {current_results[1]}"
+ )
+ if baseline_results[0] < current_results[0]:
+ raise CompareException(
+ f"The coverage of {testbench_name} increased "
+ + f"({baseline_results[0]}->{current_results[0]}). "
+ + "Please update the baseline for later checks."
+ )
+ if baseline_results[0] > current_results[0]:
+ raise CompareException(
+ f"The coverage of {testbench_name} decreased "
+ + f"({baseline_results[0]}->{current_results[0]}). "
+ + "Please update the baseline if this is acceptable."
+ )
+ if baseline_results[1] != current_results[1]:
+ logging.warning(
+ f"The number of coverage tests in {testbench_name} changed. "
+ + f"({baseline_results[1]}->{current_results[1]}). "
+ + "Please consider updating the baseline."
+ )
+
+
+def compare_assert_summary(testbench_name, baseline_results, current_results):
+ """
+ Compare two assert summary lines.
+
+ The results must be in the form of tuple:
+ (first integer value, second integer value)
+ """
+ logging.debug(
+ f"Comparing {baseline_results[0]} of {baseline_results[1]} against "
+ + f"{current_results[0]} of {current_results[1]}"
+ )
+ if baseline_results[0] < current_results[0]:
+ raise CompareException(
+ f"The number of failed asserts in {testbench_name} increased "
+ + f"({baseline_results[0]}->{current_results[0]}). "
+ + "Please update the baseline if this is acceptable."
+ )
+ if baseline_results[0] > current_results[0]:
+ raise CompareException(
+ f"The number of failed asserts in {testbench_name} decreased "
+ + f"({baseline_results[0]}->{current_results[0]}). "
+ + "Please update the baseline for later checks."
+ )
+ # The number of asserts in the code can change frequently, so don't do a check on it.
+
+
+def compare_summary_lists(baseline_list, actual_list, comparator):
+ """
+ Compare two summary lists.
+
+ List items are expected to be in the format of a tuple:
+ (testbench name, first integer value, second integer value)
+ """
+ # TODO: check for duplicated lines
+ baseline = {summary[0]: summary[1:] for summary in baseline_list}
+
+ # It is important to first check the common lines, and only report any
+ # missing or extra testbenches after that. This is to make sure that no
+ # coverage/assert change remains unnoticed due to an update of the baseline
+ # that was triggered by a tetsbench addition/deletion.
+ actual_extra = {}
+
+ for summary in actual_list:
+ testbench_name = summary[0]
+ if testbench_name not in baseline.keys():
+ actual_extra[testbench_name] = summary[1:]
+ continue
+ comparator(testbench_name, baseline[testbench_name], summary[1:])
+ del baseline[testbench_name]
+ if baseline:
+ raise CompareException(
+ f"{next(iter(baseline))} is missing from the actual result. Please update baseline!"
+ )
+ if actual_extra:
+ raise CompareException(
+ f"{testbench_name} is missing from the baseline. Please update baseline!"
+ )
+
+
+def compare_summary_files(baseline_filename, actual_filename):
+ """
+ Compare two summary files.
+ """
+ base_type, base_summary_list = parse_summary_file(baseline_filename)
+ actual_type, actual_summary_list = parse_summary_file(actual_filename)
+
+ if base_type != actual_type:
+ raise ParseException(
+ f"{baseline_filename} and {actual_filename} have different summary type"
+ )
+
+ comparators = {
+ "COVERAGE": compare_coverage_summary,
+ "ANALYSIS": compare_assert_summary,
+ "ASSERT": compare_assert_summary,
+ }
+
+ if base_type not in comparators:
+ raise ParseException(f"Unknown summary type {base_type}")
+
+ compare_summary_lists(
+ base_summary_list, actual_summary_list, comparators[base_type]
+ )
+
+
+def main():
+ """
+ main function of the script.
+ """
+ parser = argparse.ArgumentParser(description="compare CBMC summary siles")
+ parser.add_argument(
+ "-v", "--verbosity",
+ type=str,
+ choices='',
+ help="The path of the baseline summary file.",
+ )
+ parser.add_argument(
+ "baseline",
+ type=str,
+ help="The path of the baseline summary file.",
+ )
+ parser.add_argument(
+ "actual", type=str, help="The path of the current summary file."
+ )
+ args = parser.parse_args()
+
+ try:
+ compare_summary_files(args.baseline, args.actual)
+ except ParseException as exc:
+ logging.error("Failed to compare summaries:")
+ logging.error(f"{str(exc)}")
+ sys.exit(1)
+ except CompareException as exc:
+ logging.error("The results contain significant differences:")
+ logging.error(f"{str(exc)}")
+ sys.exit(1)
+
+
+if __name__ == "__main__":
+ logging.basicConfig(format="%(message)s", level=logging.WARNING)
+ main()