Add initial version of c-picker

Introduce the following features to c-picker:

* Picking of the elements from C source files:
  * Include directives
  * Functions
  * Variables
* Removing 'static' keyword from declarations
* Mapping coverage to the original source
* Documentation of the system

Signed-off-by: Imre Kis <imre.kis@arm.com>
Change-Id: Ia5cb90d3096b16b15aafb86363b8cabfe7d2ab72
diff --git a/c_picker/__init__.py b/c_picker/__init__.py
new file mode 100644
index 0000000..b606582
--- /dev/null
+++ b/c_picker/__init__.py
@@ -0,0 +1,12 @@
+#!/usr/bin/env python3
+# Copyright (c) 2019-2021, Arm Limited. All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+"""
+Package information.
+"""
+
+__all__ = ["picker", "runner", "coverage"]
+__license__ = "BSD-3-Clause"
+__version__ = "1.0.0"
diff --git a/c_picker/coverage.py b/c_picker/coverage.py
new file mode 100644
index 0000000..1c79db0
--- /dev/null
+++ b/c_picker/coverage.py
@@ -0,0 +1,189 @@
+#!/usr/bin/env python3
+# Copyright (c) 2020-2021, Arm Limited. All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+"""
+This module can map the coverage of c-picker generated files to the original source files.
+"""
+
+import argparse
+import json
+import os
+import re
+import sys
+
+class MappingDescriptor:
+    """ Storage class for file-line number data """
+
+    def __init__(self, file=None, line=None):
+        self.descriptor = {"file": file, "line": line}
+
+    def get_file(self):
+        """ Queries file name """
+        return self.descriptor["file"]
+
+    def get_line(self):
+        """ Queries line number """
+        return self.descriptor["line"]
+
+    @staticmethod
+    def serialize(mapping_descriptor):
+        """ Serializes the descriptor into a C comment containing JSON. """
+        return "/* C-PICKER-MAPPING " + json.dumps(mapping_descriptor.descriptor) + " */"
+
+    @staticmethod
+    def deserialize(line):
+        """
+        Deserializes the descriptor from a C comment containing JSON.
+        It returns None if the line is not matching the required pattern.
+        """
+        match = re.match(r"/\* C-PICKER-MAPPING ({.*}) \*/", line)
+        if not match:
+            return None
+
+        mapping_descriptor = MappingDescriptor()
+        mapping_descriptor.descriptor = json.loads(match.group(1))
+        return mapping_descriptor
+
+class CoverageMapper:
+    """ The class maps the coverage of the c-picker generated source files to the original ones. """
+
+    def __init__(self):
+        self.mapping_path = None
+        self.output = sys.stdout
+
+        self.test_name = None
+        self.mapping_enabled = False
+        self.mapping_descriptors = {}
+        self.mapped_source_file = None
+
+    def read_mapping_descriptors(self, filename):
+        """ Reads the mapping descriptors from the c-picker generated file. """
+        self.mapping_enabled = True
+
+        with open(filename, "r") as source_file:
+            source_lines = source_file.read().split("\n")
+            source_line_index = 1
+
+            for source_line in source_lines:
+                mapping_descriptor = MappingDescriptor.deserialize(source_line)
+
+                if mapping_descriptor:
+                    # +1: the elements start at the following line after the descriptor comment
+                    self.mapping_descriptors[source_line_index + 1] = mapping_descriptor
+                source_line_index += 1
+
+    def clear_mapping_descriptors(self):
+        """ Resets the mapping descriptor database. """
+        self.mapping_enabled = False
+        self.mapping_descriptors = {}
+        self.mapped_source_file = None
+
+    def find_mapping(self, line_number):
+        """ Find the mapping descriptor of a line number and also returns the mapped line. """
+        mapping_descriptor_index = 0
+        for i in self.mapping_descriptors:
+            if i > line_number:
+                break
+            mapping_descriptor_index = i
+
+        if not mapping_descriptor_index:
+            raise Exception("Invalid mapping for line %d" % line_number)
+
+        mapping_descriptor = self.mapping_descriptors[mapping_descriptor_index]
+        mapped_line = line_number - mapping_descriptor_index + mapping_descriptor.get_line()
+        return mapping_descriptor, mapped_line
+
+    def output_line(self, line):
+        """ Outputs a single line to the output """
+        self.output.write(line + "\n")
+
+    def process_line(self, trace_line):
+        """
+        The function processes a single line of the trace file and maintains the internal state of
+        the state matchine.
+        """
+        if not trace_line:
+            return
+
+        if trace_line == "end_of_record":
+            # End of record, exit mapping mode
+            self.clear_mapping_descriptors()
+            self.output_line(trace_line)
+            return
+
+        command, params = trace_line.split(":", 1)
+        if command == "TN":
+            # Test name TN:<test name>
+            self.test_name = params
+        elif command == "SF" and params.startswith(self.mapping_path):
+            # Source file SF:<absolute path to the source file>
+            # Matching source file, switch into mapping mode
+            self.read_mapping_descriptors(params)
+            return
+
+        if self.mapping_enabled and (command in ("FN", "BRDA", "DA")):
+            # Function        FN:<line number of function start>,<function name>
+            # Branch coverage BRDA:<line number>,<block number>,<branch number>,<taken>
+            # Line coverage   DA:<line number>,<execution count>[,<checksum>]
+            line_number, remaining_params = params.split(",", 1)
+            mapping_descriptor, mapped_line_number = self.find_mapping(int(line_number))
+
+            if mapping_descriptor.get_file() != self.mapped_source_file:
+                # Change in the name of the mapped source file, starting new record
+                if self.mapped_source_file is not None:
+                    # No need for this part if it was the first mapped file
+                    # because TN has been alread printed
+                    self.output_line("end_of_record")
+                    self.output_line("TN:%s" % self.test_name)
+                self.mapped_source_file = mapping_descriptor.get_file()
+                self.output_line("SF:%s" % self.mapped_source_file)
+
+            self.output_line("%s:%d,%s" % (command, mapped_line_number, remaining_params))
+            return
+
+        self.output_line(trace_line)
+
+    def main(self):
+        """ Runs coverage mapper configured by the command line arguments  """
+
+        try:
+            parser = argparse.ArgumentParser(prog="c-picker-coverage-mapper", description=__doc__)
+            parser.add_argument("--input", help="Input trace file", required=True)
+            parser.add_argument("--output", help="Input file")
+            parser.add_argument("--mapping-path", help="Directory of generated files",
+                                required=True)
+            args = parser.parse_args()
+
+            output_fp = open(args.output, "w") if args.output else None
+
+            self.output = output_fp if output_fp else sys.stdout
+            self.mapping_path = os.path.abspath(args.mapping_path)
+
+            with open(args.input, "r") as tracefile:
+                trace_lines = tracefile.read().split("\n")
+                for trace_line in trace_lines:
+                    self.process_line(trace_line)
+
+            if output_fp:
+                output_fp.close()
+
+            return 0
+        except FileNotFoundError as exception:
+            print("File not found: %s" % str(exception), file=sys.stderr)
+        except ValueError as exception:
+            print("Invalid format: %s" % str(exception), file=sys.stderr)
+        except Exception as exception: # pylint: disable=broad-except
+            print("Exception: %s" % exception, file=sys.stderr)
+
+        return 1
+
+def main():
+    """ Command line main function """
+    coverage_mapper = CoverageMapper()
+    result = coverage_mapper.main()
+    sys.exit(result)
+
+if __name__ == "__main__":
+    main()
diff --git a/c_picker/picker.py b/c_picker/picker.py
new file mode 100644
index 0000000..963a313
--- /dev/null
+++ b/c_picker/picker.py
@@ -0,0 +1,194 @@
+#!/usr/bin/env python3
+# Copyright (c) 2019-2021, Arm Limited. All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+"""
+This module can fetch elements (include directives, functions, etc) from C source codes.
+The main purpose of this module is help unit testing by isolating functions from the
+rest of the code.
+"""
+
+import enum
+import os
+import sys
+
+# pylint exceptions are used because of the necessary Config.set_library_path call.
+from clang.cindex import Config
+if "CLANG_LIBRARY_PATH" in os.environ:
+    Config.set_library_path(os.environ["CLANG_LIBRARY_PATH"])
+import clang.cindex # pylint: disable=wrong-import-position
+
+from c_picker.coverage import MappingDescriptor # pylint: disable=wrong-import-position
+
+class CPicker:
+    """ CPicker can fetch C source element from a file matching the parameters and options. """
+
+    class Type(enum.Enum):
+        """ C source element type """
+        include = clang.cindex.CursorKind.INCLUSION_DIRECTIVE
+        function = clang.cindex.CursorKind.FUNCTION_DECL
+        variable = clang.cindex.CursorKind.VAR_DECL
+
+    class Option(enum.Enum):
+        """ Parameter for modifying the behaviour of the fetcher """
+
+        class RemoveStaticProcessor:
+            """ Removes 'static' from function declaration """
+
+            @staticmethod
+            def is_matching(node):
+                """ Checks if the storage class STATIC """
+                return node.storage_class == clang.cindex.StorageClass.STATIC
+
+            @staticmethod
+            def process(lines):
+                """ Removes 'static' before the function body """
+                processed_lines = []
+                function_body_started = False
+                for line in lines:
+                    if not function_body_started:
+                        processed_lines.append(line.replace("static ", ""))
+                        function_body_started = "{" in line
+                    else:
+                        processed_lines.append(line)
+                return processed_lines
+
+        remove_static = RemoveStaticProcessor
+
+    class ElementDescriptor:
+        """ Data structure of matching parameters """
+        def __init__(self, file_name, element_type, element_name=None):
+            self.file_name = file_name
+            self.element_type = element_type
+            self.element_name = element_name
+            self.args = None
+            self.options = None
+
+        def set_args(self, args):
+            """ Setting arguments of the parser """
+            self.args = args
+
+        def set_options(self, options):
+            """ Setting options for the fetcher """
+            self.options = options
+
+        def is_single_matching_type(self):
+            """ There's only a single matching element """
+            return bool(self.element_name)
+
+        def is_matching(self, node):
+            """ The node is matching the element defined in this instance """
+            if self.element_type == CPicker.Type.include:
+                return ((node.kind == self.element_type.value) and
+                        (str(node.location.file) == self.file_name))
+
+            if self.element_type == CPicker.Type.function:
+                return ((node.kind == self.element_type.value) and
+                        (str(node.location.file) == self.file_name) and
+                        (node.spelling == self.element_name) and
+                        node.is_definition())
+            if self.element_type == CPicker.Type.variable:
+                return ((node.kind == self.element_type.value) and
+                        (str(node.location.file) == self.file_name) and
+                        (node.spelling == self.element_name))
+            raise Exception("Invalid element type")
+
+        def get_option_processors(self, node):
+            """ Get processor function for matching options """
+            processors = []
+            for option in self.options:
+                if option.value.is_matching(node):
+                    processors.append(option.value)
+            return processors
+
+    def __init__(self, output=None, print_dependencies=False):
+        self.output = output if output else sys.stdout
+        self.print_dependencies = print_dependencies
+
+    def generate_header_comment(self):
+        """ Warning comment in the first line of the code """
+        self.output.write("/* DO NOT MODIFY! Generated by c-picker. */\n")
+
+    def generate_dependencies(self, elements):
+        """ Print the set of files included in the elements list """
+        dependencies = set()
+
+        for element in elements:
+            dependencies.add(os.path.abspath(element.file_name))
+
+        self.output.write(";".join(dependencies))
+
+    def fetch(self, element_descriptor):
+        """ Fetching C source element from the file """
+
+        # Parsing file with clang
+        try:
+            parser_options = clang.cindex.TranslationUnit.PARSE_DETAILED_PROCESSING_RECORD
+            index = clang.cindex.Index.create()
+            translation_unit = index.parse(element_descriptor.file_name,
+                                           args=element_descriptor.args,
+                                           options=parser_options)
+        except clang.cindex.TranslationUnitLoadError as exception:
+            raise Exception("Failed to parse " + element_descriptor.file_name + ": "
+                            + str(exception))
+
+        # Element comment
+        if element_descriptor.element_name:
+            element_text = element_descriptor.element_name
+        else:
+            element_text = element_descriptor.element_type.name
+
+        self.output.write("\n/* %s from %s */\n" % (element_text, element_descriptor.file_name))
+
+        # Searching for matching elements
+        for node in translation_unit.cursor.walk_preorder():
+            if element_descriptor.is_matching(node):
+                self.dump(node, element_descriptor.get_option_processors(node))
+                if element_descriptor.is_single_matching_type():
+                    break
+        else:
+            if element_descriptor.is_single_matching_type():
+                raise Exception("%s not found in %s"
+                                % (element_descriptor.element_name, element_descriptor.file_name))
+
+    def dump(self, node, options_processors):
+        """ Dump the contents of a node to the specified output """
+        with open(str(node.location.file), "r") as source_file:
+            source_lines = source_file.readlines()
+            source_lines = source_lines[node.extent.start.line - 1 : node.extent.end.line]
+
+            if len(source_lines) == 1:
+                source_lines = [source_lines[0][node.extent.start.column - 1 :
+                                                node.extent.end.column - 1]]
+            elif len(source_lines) > 1:
+                source_lines = ([source_lines[0][node.extent.start.column - 1 :]] +
+                                source_lines[1:-1] +
+                                [source_lines[-1][: node.extent.end.column - 1]])
+
+            for processor in options_processors:
+                source_lines = processor.process(source_lines)
+
+            # Mapping information of the original source
+            mapping_descriptor = MappingDescriptor(str(node.location.file), node.extent.start.line)
+            self.output.write(MappingDescriptor.serialize(mapping_descriptor) + "\n")
+
+            for source_line in source_lines:
+                self.output.write(source_line)
+            if node.kind == CPicker.Type.variable.value:
+                self.output.write(";")
+            self.output.write("\n")
+
+    def process(self, elements):
+        """ Processes element list """
+        try:
+            if not self.print_dependencies:
+                self.generate_header_comment()
+
+                for element in elements:
+                    self.fetch(element)
+            else:
+                self.generate_dependencies(elements)
+        except clang.cindex.LibclangError as _:
+            raise Exception("Please ensure you have the correct version of libclang installed" +
+                            " and the CLANG_LIBRARY_PATH environment variable set.")
diff --git a/c_picker/runner.py b/c_picker/runner.py
new file mode 100644
index 0000000..b1d02a2
--- /dev/null
+++ b/c_picker/runner.py
@@ -0,0 +1,117 @@
+#!/usr/bin/env python3
+# Copyright (c) 2020-2021, Arm Limited. All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+"""
+This module wraps the c-picker functionality into a command line interface.
+"""
+
+import argparse
+import json
+import sys
+import yaml
+
+from c_picker import __version__ as CPICKER_VERSION
+from c_picker.picker import CPicker
+from c_picker.picker import __doc__ as c_picker_doc
+
+class CPickerRunner:
+    """ Command line wrapper for CPicker """
+
+    def __init__(self):
+        self.root = ""
+        self.args = []
+        self.options = []
+
+        self.parser = argparse.ArgumentParser(prog="c-picker", description=c_picker_doc)
+
+        self.parser.add_argument("--root", help="Root source directory")
+        self.parser.add_argument("--config", help="Configuration file (.json|.yml)", required=True)
+        self.parser.add_argument("--output", help="Output file")
+        self.parser.add_argument("--print-dependencies", help="Print dependencies",
+                                 action="store_true")
+        self.parser.add_argument("--version", action="version", version=CPICKER_VERSION)
+        self.parser.add_argument("--args", help="clang arguments", nargs=argparse.REMAINDER)
+
+    @staticmethod
+    def error(message):
+        """ Print error message to stderr """
+        print("c-picker error\n" + message, file=sys.stderr)
+
+    def generate_element_descriptors(self, elements):
+        """ Converts the structure of the config file to a list of ElementDescriptors """
+        element_descriptors = []
+
+        for element in elements:
+            file_name = self.root + element["file"]
+            element_name = element["name"] if "name" in element else None
+            element_type = CPicker.Type[element["type"]]
+            element_args = self.args + element.get("args", [])
+            element_options = [
+                CPicker.Option[opt]
+                for opt in self.options + element.get("options", [])]
+
+            descriptor = CPicker.ElementDescriptor(file_name, element_type, element_name)
+            descriptor.set_args(element_args)
+            descriptor.set_options(element_options)
+
+            element_descriptors.append(descriptor)
+
+        return element_descriptors
+
+    @staticmethod
+    def create_config(args):
+        """ Create a configuration object from the command line arguments and config files. """
+        try:
+            if args.config.endswith(".json"):
+                with open(args.config) as json_file:
+                    return json.load(json_file)
+            elif args.config.endswith(".yml"):
+                with open(args.config) as yml_file:
+                    return yaml.safe_load(yml_file)
+            else:
+                raise Exception("Invalid configuration file %s" % args.config)
+        except json.decoder.JSONDecodeError as exception:
+            raise Exception("Invalid JSON format: " + str(exception))
+        except yaml.YAMLError as exception:
+            raise Exception("Invalid YAML format: " + str(exception))
+        except FileNotFoundError as exception:
+            raise Exception("File not found: " + str(exception))
+
+    def main(self):
+        """ Runs CPicker configured by the command line arguments  """
+        try:
+            args = self.parser.parse_args()
+            config = self.create_config(args)
+
+            output_fp = open(args.output, "w") if args.output else None
+
+            self.root = args.root + "/" if args.root else ""
+            self.args = args.args if args.args else [] + config.get("args", [])
+            self.options = config.get("options", [])
+
+            elements = self.generate_element_descriptors(config.get("elements", []))
+
+            c_picker = CPicker(output_fp, args.print_dependencies)
+            c_picker.process(elements)
+
+            if output_fp:
+                output_fp.close()
+
+            return 0
+        except KeyError as exception:
+            self.error("Key error: " + str(exception))
+        except Exception as exception: # pylint: disable=broad-except
+            self.error("Error: " + str(exception))
+
+        return 1
+
+def main():
+    """ Command line main function """
+    c_picker_runner = CPickerRunner()
+    result = c_picker_runner.main()
+    sys.exit(result)
+
+if __name__ == '__main__':
+    main()