refactor(build): python generate_test_list script

This change introduces generate_test_list.py which is exactly like
generate_test_list.pl except written in python. This helps remove a
dependency on perl as well as a dependency on perl's libxml bindings.
The only required dependency is python3 and its standard library.

Tested by generating tests_list.c/h for each xml file and each platform
and comparing it against the output from the original script.

Signed-off-by: Jim Ray <jimray@google.com>
Change-Id: If0fa8d0e45bf58ce35081aaeb7a9320dfcefdbf9
diff --git a/Makefile b/Makefile
index 2c31a1a..0e7a3e9 100644
--- a/Makefile
+++ b/Makefile
@@ -525,8 +525,10 @@
 
 $(AUTOGEN_DIR)/tests_list.c $(AUTOGEN_DIR)/tests_list.h: $(AUTOGEN_DIR) ${TESTS_FILE} ${PLAT_TESTS_SKIP_LIST} $(ARCH_TESTS_SKIP_LIST)
 	@echo "  AUTOGEN $@"
-	tools/generate_test_list/generate_test_list.pl $(AUTOGEN_DIR)/tests_list.c \
-		$(AUTOGEN_DIR)/tests_list.h  ${TESTS_FILE} $(PLAT_TESTS_SKIP_LIST) $(ARCH_TESTS_SKIP_LIST)
+	tools/generate_test_list/generate_test_list.py $(AUTOGEN_DIR)/tests_list.c \
+		$(AUTOGEN_DIR)/tests_list.h  ${TESTS_FILE} \
+		--plat-skip-file=$(PLAT_TESTS_SKIP_LIST) \
+		--arch-skip-file=$(ARCH_TESTS_SKIP_LIST)
 ifeq ($(SMC_FUZZING), 1)
 	$(Q)mkdir -p  ${BUILD_PLAT}/smcf
 	dtc ${SMC_FUZZ_DTS} >> ${BUILD_PLAT}/smcf/dtb
diff --git a/docs/getting_started/requirements.rst b/docs/getting_started/requirements.rst
index bea412e..f38964a 100644
--- a/docs/getting_started/requirements.rst
+++ b/docs/getting_started/requirements.rst
@@ -23,7 +23,9 @@
 
 ::
 
-    sudo apt-get install device-tree-compiler build-essential git perl libxml-libxml-perl
+    sudo apt-get install device-tree-compiler build-essential git python3
+
+Note that at least Python 3.7 is required.
 
 Download and install the GNU cross-toolchain from Arm. The TF-A Tests have
 been tested with version 12.2.Rel1 (gcc 12.2):
diff --git a/tools/generate_test_list/generate_test_list.pl b/tools/generate_test_list/generate_test_list.pl
deleted file mode 100755
index 5d6d4b7..0000000
--- a/tools/generate_test_list/generate_test_list.pl
+++ /dev/null
@@ -1,196 +0,0 @@
-#!/usr/bin/env perl
-
-#
-# Copyright (c) 2018-2020, Arm Limited. All rights reserved.
-#
-# SPDX-License-Identifier: BSD-3-Clause
-#
-#
-# Arg0: Name of the C file to generate.
-# Arg1: Name of the header file to generate.
-# Arg2: XML file that contains the list of test suites.
-# Arg3: Text file listing the files to skip. Takes precedence over Arg2 file.
-#
-
-my $TESTLIST_SRC_FILENAME = $ARGV[0];
-my $TESTLIST_HDR_FILENAME = $ARGV[1];
-my $XML_TEST_FILENAME     = $ARGV[2];
-my $SKIPPED_LIST_FILENAME = $ARGV[3];
-my $SKIPPED_LIST_FILENAME2 = $ARGV[4];
-
-use strict;
-use warnings;
-use File::Temp;
-use XML::LibXML;
-
-# Create the source & header files
-open FILE_SRC, ">", $TESTLIST_SRC_FILENAME or die $!;
-open FILE_HDR, ">", $TESTLIST_HDR_FILENAME or die $!;
-
-#
-# Open the test list
-#
-my $doc;
-my $testsuite_elem;
-my $failure_elem;
-
-if (-e $XML_TEST_FILENAME) {
-  my $parser = XML::LibXML->new(expand_entities => 1);
-  $doc = $parser->parse_file($XML_TEST_FILENAME);
-  $parser->process_xincludes($doc);
-} else {
-  exit 1
-}
-
-# We assume if there is a root then it is a 'testsuites' element
-my $root = $doc->documentElement();
-my @all_testcases   = $root->findnodes("//testcase");
-my @all_testsuites  = $root->findnodes("//testsuite");
-
-# Check the validity of the XML file:
-# - A testsuite name must be unique.
-# - A testsuite name must not contain a '/' character.
-# - All test cases belonging to a given testsuite must have unique names.
-for my $testsuite (@all_testsuites) {
-  my $testsuite_name = $testsuite->getAttribute('name');
-  if ($testsuite_name =~ /\//) {
-      print "ERROR: $XML_TEST_FILENAME: Invalid test suite name '$testsuite_name'.\n";
-      print "ERROR: $XML_TEST_FILENAME: Test suite names can't include a '/' character.\n";
-      exit 1;
-  }
-  my @testsuites = $root->findnodes("//testsuite[\@name='$testsuite_name']");
-  if (@testsuites != 1) {
-    print "ERROR: $XML_TEST_FILENAME: Can't have 2 test suites named '$testsuite_name'.\n";
-    exit 1;
-  }
-
-  my @testcases_of_testsuite = $testsuite->findnodes("testcase");
-  for my $testcase (@testcases_of_testsuite) {
-    my $testcase_name = $testcase->getAttribute('name');
-    my @testcases = $testsuite->findnodes("testcase[\@name='$testcase_name']");
-    if (@testcases != 1) {
-      print "ERROR: $XML_TEST_FILENAME: Can't have 2 tests named '$testsuite_name/$testcase_name'.\n";
-      exit 1;
-    }
-  }
-}
-
-#
-# Get the list of tests to skip.
-# For each test to skip, find it in the XML tree and remove its node.
-#
-sub skip_test {
-  my $file = $_[0];
-  if (($file) && (open SKIPPED_FILE, "<", $file)) {
-    my @lines = <SKIPPED_FILE>;
-
-    # Remove the newlines from the end of each line.
-    chomp @lines;
-
-    my $line_no = 0;
-    my $testsuite_name;
-    my $testcase_name;
-    my $index = 0;
-
-    for my $line (@lines) {
-      ++$line_no;
-
-      # Skip empty lines.
-      if ($line =~ /^ *$/) { next; }
-      # Skip comments.
-      if ($line =~ /^#/) { next; }
-
-      ($testsuite_name, $testcase_name) = split('/', $line);
-
-      my @testsuites = $root->findnodes("//testsuite[\@name=\"$testsuite_name\"]");
-      if (!@testsuites) {
-        print "WARNING: $SKIPPED_LIST_FILENAME:$line_no: Test suite '$testsuite_name' doesn't exist or has already been deleted.\n";
-        next;
-      }
-
-      if (!defined $testcase_name) {
-        print "INFO: Testsuite '$testsuite_name' will be skipped.\n";
-        $testsuites[0]->unbindNode();
-        next;
-      }
-
-      my @testcases = $testsuites[0]->findnodes("testcase[\@name=\"$testcase_name\"]");
-      if (!@testcases) {
-        print "WARNING: $SKIPPED_LIST_FILENAME:$line_no: Test case '$testsuite_name/$testcase_name' doesn't exist or has already been deleted.\n";
-        next;
-      }
-
-      print "INFO: Testcase '$testsuite_name/$testcase_name' will be skipped.\n";
-      $testcases[0]->unbindNode();
-    }
-    close (SKIPPED_FILE);
-  }
-}
-
-skip_test($SKIPPED_LIST_FILENAME);
-skip_test($SKIPPED_LIST_FILENAME2);
-@all_testcases = $root->findnodes("//testcase");
-
-#
-# Generate the test function prototypes
-#
-my $testcase_count = 0;
-
-print FILE_SRC "#include \"tftf.h\"\n\n";
-
-for my $testcase (@all_testcases) {
-  my $testcase_function = $testcase->getAttribute('function');
-  $testcase_count++;
-  print FILE_SRC "test_result_t $testcase_function(void);\n";
-}
-
-#
-# Generate the header file.
-#
-print FILE_HDR "#ifndef __TEST_LIST_H__\n";
-print FILE_HDR "#define __TEST_LIST_H__\n\n";
-print FILE_HDR "#define TESTCASE_RESULT_COUNT $testcase_count\n\n";
-print FILE_HDR "#endif\n";
-
-#
-# Generate the lists of testcases
-#
-my $testsuite_index = 0;
-my $testcase_index = 0;
-@all_testsuites  = $root->findnodes("//testsuite");
-for my $testsuite (@all_testsuites) {
-  my $testsuite_name = $testsuite->getAttribute('name');
-  my @testcases = $testsuite->findnodes("//testsuite[\@name='$testsuite_name']//testcase");
-
-  print FILE_SRC "\nconst test_case_t testcases_${testsuite_index}[] = {\n";
-
-  for my $testcase (@testcases) {
-    my $testcase_name = $testcase->getAttribute('name');
-    my $testcase_description = $testcase->getAttribute('description');
-    my $testcase_function = $testcase->getAttribute('function');
-
-    if (!defined($testcase_description)) { $testcase_description = ""; }
-
-    print FILE_SRC "  { $testcase_index, \"$testcase_name\", \"$testcase_description\", $testcase_function },\n";
-
-    $testcase_index++;
-  }
-  print FILE_SRC "  { 0, NULL, NULL, NULL }\n";
-  print FILE_SRC "};\n\n";
-  $testsuite_index++;
-}
-
-#
-# Generate the lists of testsuites
-#
-$testsuite_index = 0;
-print FILE_SRC "const test_suite_t testsuites[] = {\n";
-for my $testsuite (@all_testsuites) {
-  my $testsuite_name = $testsuite->getAttribute('name');
-  my $testsuite_description = $testsuite->getAttribute('description');
-  print FILE_SRC "  { \"$testsuite_name\", \"$testsuite_description\", testcases_${testsuite_index} },\n";
-  $testsuite_index++;
-}
-print FILE_SRC "  { NULL, NULL, NULL }\n";
-print FILE_SRC "};\n";
-
diff --git a/tools/generate_test_list/generate_test_list.py b/tools/generate_test_list/generate_test_list.py
new file mode 100755
index 0000000..ed80dc5
--- /dev/null
+++ b/tools/generate_test_list/generate_test_list.py
@@ -0,0 +1,364 @@
+#!/usr/bin/env python3
+#
+# Copyright (c) 2023 Google LLC. All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+#
+
+"""Generates the same output as generate_test_list.pl, but using python.
+
+Takes an xml file describing a list of testsuites as well as a skip list file
+and outputs a src and header file that refers to those tests.
+"""
+
+# This script was linted and formatted using the following commands:
+# autoflake -ir --remove-all-unused-imports --expand-star-imports \
+#   --remove-duplicate-keys --remove-unused-variables tools/generate_test_list/
+# isort tools/generate_test_list/
+# black tools/generate_test_list/ --line-length 100
+# flake8 tools/generate_test_list/ --max-line-length 100
+
+import argparse
+from dataclasses import dataclass
+import os.path
+import urllib.parse
+import xml.etree.ElementInclude
+from xml.etree.ElementTree import Element
+from xml.etree.ElementTree import TreeBuilder
+import xml.parsers.expat
+
+
+TESTS_LIST_H_TPL_FILENAME = "tests_list.h.tpl"
+TESTCASE_COUNT_TEMPLATE = "{{testcase_count}}"
+
+TESTS_LIST_C_TPL_FILENAME = "tests_list.c.tpl"
+FUNCTION_PROTOTYPES_TEMPLATE = "{{function_prototypes}}"
+TESTCASE_LISTS_TEMPLATE = "{{testcase_lists}}"
+TESTSUITES_LIST_TEMPLATE = "{{testsuites_list}}"
+
+XINCLUDE_INCLUDE = "xi:include"
+
+MAX_EXPANSION_DEPTH = 5
+
+# Intermediate repesentation classes.
+
+
+@dataclass
+class TestCase:
+    """Class representing a single TFTF test case."""
+
+    name: str
+    function: str
+    description: str = ""
+
+
+@dataclass
+class TestSuite:
+    """Class representing a single TFTF test suite."""
+
+    name: str
+    description: str
+    testcases: list[TestCase]
+
+
+def find_element_with_name_or_return_none(iterable, name: str):
+    """Looks through iterable for an element whose 'name' field matches name."""
+    return next(filter(lambda x: x.name == name, iterable), None)
+
+
+def parse_testsuites_element_into_ir(root: Element) -> list[TestSuite]:
+    """Given the root of a parsed XML file, construct TestSuite objects."""
+    testsuite_xml_elements = root.findall(".//testsuite")
+
+    testsuites = []
+    # Parse into IR
+    for testsuite in testsuite_xml_elements:
+        testcases = []
+        for testcase in testsuite.findall("testcase"):
+            testcases += [
+                TestCase(
+                    testcase.get("name"),
+                    testcase.get("function"),
+                    testcase.get("description", default=""),
+                )
+            ]
+        testsuites += [TestSuite(testsuite.get("name"), testsuite.get("description"), testcases)]
+
+    return testsuites
+
+
+# In order to keep this script standalone (meaning no libraries outside of the
+# standard library), we have to do our own assembling of the XML Elements. This
+# is necessary because python doesn't give us a nice way to support external
+# entity expansion. As such we have to use the low level expat parser and build
+# the tree using TreeBuilder.
+
+
+def parse_xml_no_xinclude_expansion(filename: str) -> Element:
+    """Parse filename into an ElementTree.Element, following external entities."""
+    xml_dir_root = os.path.dirname(filename)
+    with open(filename) as fobj:
+        xml_contents = fobj.read()
+
+    parser = xml.parsers.expat.ParserCreate()
+    parser.SetParamEntityParsing(xml.parsers.expat.XML_PARAM_ENTITY_PARSING_ALWAYS)
+
+    global treebuilder
+    treebuilder = TreeBuilder()
+    global expansion_depth
+    expansion_depth = 0
+
+    def start_element_handler(name: str, attributes):
+        # ElementInclude.include requires that the XInclude namespace is expanded.
+        if name == "xi:include":
+            name = "{http://www.w3.org/2001/XInclude}include"
+        treebuilder.start(name, attributes)
+
+    def end_element_handler(name: str):
+        treebuilder.end(name)
+
+    def external_entity_ref_handler(context, base, systemId, publicId):
+        global expansion_depth
+
+        external_entity_parser = parser.ExternalEntityParserCreate(context, "utf-8")
+        assign_all_parser_callbacks(external_entity_parser)
+        with open(os.path.join(xml_dir_root, systemId)) as fobj:
+            sub_xml_contents = fobj.read()
+            expansion_depth += 1
+            if expansion_depth > MAX_EXPANSION_DEPTH:
+                raise ValueError("Max entity expansion depth reached")
+
+            external_entity_parser.Parse(sub_xml_contents, True)
+            expansion_depth -= 1
+        return 1
+
+    def assign_all_parser_callbacks(p):
+        p.StartElementHandler = start_element_handler
+        p.EndElementHandler = end_element_handler
+        p.ExternalEntityRefHandler = external_entity_ref_handler
+
+    assign_all_parser_callbacks(parser)
+    parser.Parse(xml_contents, True)
+
+    return treebuilder.close()
+
+
+# Older versions of python3 don't support ElementInclude.include's base_url
+# kwarg. This callable class works around this.
+# base_url allows XInclude paths relative to the toplevel XML file to be used.
+class ElementIncludeLoaderAdapter:
+    """Adapts between ElementInclude's loader interface and our XML parser."""
+
+    def __init__(self, base_url: str):
+        self.base_url = base_url
+
+    def __call__(self, href: str, parse: str):
+        if parse != "xml":
+            raise ValueError("'parse' must be 'xml'")
+
+        return parse_xml_no_xinclude_expansion(urllib.parse.urljoin(self.base_url, href))
+
+
+def parse_testsuites_from_file(filename: str) -> list[TestSuite]:
+    """Given an XML file, parse the contents into a List[TestSuite]."""
+    root = parse_xml_no_xinclude_expansion(filename)
+
+    base_url = os.path.abspath(filename)
+    loader = ElementIncludeLoaderAdapter(base_url)
+    xml.etree.ElementInclude.include(root, loader=loader)
+
+    if root.tag == "testsuites":
+        testsuites_xml_elements = [root]
+    elif root.tag == "document":
+        testsuites_xml_elements = root.findall("testsuites")
+    else:
+        raise ValueError(f"Unexpected root tag '{root.tag}' in {filename}")
+
+    testsuites = []
+
+    for testsuites_xml_element in testsuites_xml_elements:
+        testsuites += parse_testsuites_element_into_ir(testsuites_xml_element)
+
+    return testsuites
+
+
+def check_validity_of_names(testsuites: list[TestSuite]):
+    """Checks that all testsuite and testcase names are valid."""
+    testsuite_name_set = set()
+    for ts in testsuites:
+        if "/" in ts.name:
+            raise ValueError(f"ERROR: {args.xml_test_filename}: Invalid test suite name {ts.name}")
+
+        if ts.name in testsuite_name_set:
+            raise ValueError(
+                f"ERROR: {args.xml_test_filename}: Can't have 2 test suites named " f"{ts.name}"
+            )
+
+        testsuite_name_set.add(ts.name)
+
+        testcase_name_set = set()
+        for tc in ts.testcases:
+            if tc.name in testcase_name_set:
+                raise ValueError(
+                    f"ERROR: {args.xml_test_filename}: Can't have 2 tests named " f"{tc.name}"
+                )
+
+            testcase_name_set.add(tc.name)
+
+
+def remove_skipped_tests(testsuites: list[TestSuite], skip_tests_filename: str):
+    """Remove skipped tests from testsuites based on skip_tests_filename."""
+    with open(skip_tests_filename) as skipped_file:
+        skipped_file_lines = skipped_file.readlines()
+        for i, l in enumerate(skipped_file_lines):
+            line = l.strip()
+
+            # Skip empty lines and comments
+            if not line or line[0] == "#":
+                continue
+
+            testsuite_name, sep, testcase_name = line.partition("/")
+
+            testsuite = find_element_with_name_or_return_none(testsuites, testsuite_name)
+
+            if not testsuite:
+                print(
+                    f"WARNING: {skip_tests_filename}:{i + 1}: Test suite "
+                    f"'{testsuite_name}' doesn't exist or has already been deleted."
+                )
+                continue
+
+            if not testcase_name:
+                print(f"INFO: Test suite '{testsuite_name}' will be skipped")
+                testsuites = list(filter(lambda x: x.name != testsuite_name, testsuites))
+                continue
+
+            testcase = find_element_with_name_or_return_none(testsuite.testcases, testcase_name)
+            if not testcase:
+                print(
+                    f"WARNING: {skip_tests_filename}:{i + 1}: Test case "
+                    f"'{testsuite_name}/{testcase_name} doesn't exist or has already "
+                    "been deleted"
+                )
+                continue
+
+            print(f"INFO: Test case '{testsuite_name}/{testcase_name}' will be skipped.")
+            testsuite.testcases.remove(testcase)
+
+        return testsuites
+
+
+def generate_function_prototypes(testcases: list[TestCase]):
+    """Generates function prototypes for the provided list of testcases."""
+    return [f"test_result_t {t.function}(void);" for t in testcases]
+
+
+def generate_testcase_lists(testsuites: list[TestSuite]):
+    """Generates the lists that enumerate the individual testcases in each testsuite."""
+    testcase_lists_contents = []
+    testcase_index = 0
+    for i, testsuite in enumerate(testsuites):
+        testcase_lists_contents += [f"\nconst test_case_t testcases_{i}[] = {{"]
+        for testcase in testsuite.testcases:
+            testcase_lists_contents += [
+                f'  {{ {testcase_index}, "{testcase.name}", '
+                f'"{testcase.description}", {testcase.function} }},'
+            ]
+            testcase_index += 1
+        testcase_lists_contents += ["  { 0, NULL, NULL, NULL }"]
+        testcase_lists_contents += ["};\n"]
+
+    return testcase_lists_contents
+
+
+def generate_testsuite_lists(testsuites: list[TestSuite]):
+    """Generates the list of testsuites."""
+    testsuites_list_contents = []
+    testsuites_list_contents += ["const test_suite_t testsuites[] = {"]
+    for i, testsuite in enumerate(testsuites):
+        testsuites_list_contents += [
+            f'  {{ "{testsuite.name}", "{testsuite.description}", testcases_{i} }},'
+        ]
+    testsuites_list_contents += ["  { NULL, NULL, NULL }"]
+    testsuites_list_contents += ["};"]
+    return testsuites_list_contents
+
+
+def generate_file_from_template(
+    template_filename: str, output_filename: str, template: dict[str, str]
+):
+    """Given a template file, generate an output file based on template dictionary."""
+    with open(template_filename) as template_fobj:
+        template_contents = template_fobj.read()
+
+    output_contents = template_contents
+    for to_find, to_replace in template.items():
+        output_contents = output_contents.replace(to_find, to_replace)
+
+    with open(output_filename, "w") as output_fobj:
+        output_fobj.write(output_contents)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument(
+        "testlist_src_filename",
+        type=str,
+        help="Output source filename",
+    )
+    parser.add_argument(
+        "testlist_hdr_filename",
+        type=str,
+        help="Output header filename",
+    )
+    parser.add_argument("xml_test_filename", type=str, help="Input xml filename")
+    parser.add_argument(
+        "--plat-skip-file",
+        type=str,
+        help="Filename containing tests to skip for this platform",
+        dest="plat_skipped_list_filename",
+        required=False,
+    )
+    parser.add_argument(
+        "--arch-skip-file",
+        type=str,
+        help="Filename containing tests to skip for this architecture",
+        dest="arch_skipped_list_filename",
+        required=False,
+    )
+    args = parser.parse_args()
+
+    testsuites = parse_testsuites_from_file(args.xml_test_filename)
+
+    check_validity_of_names(testsuites)
+
+    if args.plat_skipped_list_filename:
+        testsuites = remove_skipped_tests(testsuites, args.plat_skipped_list_filename)
+
+    if args.arch_skipped_list_filename:
+        testsuites = remove_skipped_tests(testsuites, args.arch_skipped_list_filename)
+
+    # Flatten all testcases
+    combined_testcases = [tc for ts in testsuites for tc in ts.testcases]
+
+    # Generate header file
+    generate_file_from_template(
+        os.path.join(os.path.dirname(__file__), TESTS_LIST_H_TPL_FILENAME),
+        args.testlist_hdr_filename,
+        {TESTCASE_COUNT_TEMPLATE: str(len(combined_testcases))},
+    )
+
+    # Generate the source file
+    all_function_prototypes = generate_function_prototypes(combined_testcases)
+    testcase_lists_contents = generate_testcase_lists(testsuites)
+    testsuites_list_contents = generate_testsuite_lists(testsuites)
+
+    generate_file_from_template(
+        os.path.join(os.path.dirname(__file__), TESTS_LIST_C_TPL_FILENAME),
+        args.testlist_src_filename,
+        {
+            FUNCTION_PROTOTYPES_TEMPLATE: "\n".join(all_function_prototypes),
+            TESTCASE_LISTS_TEMPLATE: "\n".join(testcase_lists_contents),
+            TESTSUITES_LIST_TEMPLATE: "\n".join(testsuites_list_contents),
+        },
+    )
diff --git a/tools/generate_test_list/tests_list.c.tpl b/tools/generate_test_list/tests_list.c.tpl
new file mode 100644
index 0000000..115cb71
--- /dev/null
+++ b/tools/generate_test_list/tests_list.c.tpl
@@ -0,0 +1,7 @@
+#include "tftf.h"
+
+{{function_prototypes}}
+
+{{testcase_lists}}
+
+{{testsuites_list}}
diff --git a/tools/generate_test_list/tests_list.h.tpl b/tools/generate_test_list/tests_list.h.tpl
new file mode 100644
index 0000000..963b698
--- /dev/null
+++ b/tools/generate_test_list/tests_list.h.tpl
@@ -0,0 +1,6 @@
+#ifndef __TEST_LIST_H__
+#define __TEST_LIST_H__
+
+#define TESTCASE_RESULT_COUNT {{testcase_count}}
+
+#endif  // __TEST_LIST_H__