scripts: add symbolize.py

Add a helper script to decode call stacks shown in abort messages. The
script relies on addr2line to convert virtual addresses to debug
information: 'function at file:line'.

Signed-off-by: Jerome Forissier <jerome.forissier@linaro.org>
Acked-by: Jens Wiklander <jens.wiklander@linaro.org>
Reviewed-by: Joakim Bech <joakim.bech@linaro.org>
diff --git a/scripts/symbolize.py b/scripts/symbolize.py
new file mode 100755
index 0000000..43f378d
--- /dev/null
+++ b/scripts/symbolize.py
@@ -0,0 +1,192 @@
+#!/usr/bin/env python
+#
+# Copyright (c) 2017, Linaro Limited
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#
+
+
+import argparse
+import glob
+import re
+import subprocess
+import sys
+
+TA_UUID_RE = re.compile(r'Status of TA (?P<uuid>[0-9a-f\-]+)')
+TA_INFO_RE = re.compile(':  arch: (?P<arch>\w+)  '
+                        'load address: (?P<load_addr>0x[0-9a-f]+)')
+CALL_STACK_RE = re.compile('Call stack:')
+STACK_ADDR_RE = re.compile(r':  (?P<addr>0x[0-9a-f]+)')
+X64_REGS_RE = re.compile(':  x0  [0-9a-f]{16} x1  [0-9a-f]{16}')
+
+epilog = '''
+This scripts reads an OP-TEE abort message from stdin and adds debug
+information ('function at file:line') next to each address in the call stack.
+It uses the paths provided on the command line to locate the appropriate ELF
+binary (tee.elf or Trusted Application) and runs arm-linux-gnueabihf-addr2line
+or aarch64-linux-gnu-addr2line to process the addresses.
+
+OP-TEE abort messages are sent to the secure console. They look like the
+following:
+
+  ERROR:   TEE-CORE: User TA data-abort at address 0xffffdecd (alignment fault)
+  ...
+  ERROR:   TEE-CORE: Call stack:
+  ERROR:   TEE-CORE:  0x4000549e
+  ERROR:   TEE-CORE:  0x40001f4b
+  ERROR:   TEE-CORE:  0x4000273f
+  ERROR:   TEE-CORE:  0x40005da7
+
+Inspired by a script of the same name by the Chromium project.
+
+Sample usage:
+
+  $ scripts/symbolize.py -d out/arm-plat-hikey/core -d ../optee_test/out/ta/*
+  <paste whole dump here>
+  ^D
+'''
+
+def get_args():
+    parser = argparse.ArgumentParser(
+                formatter_class=argparse.RawDescriptionHelpFormatter,
+                description='Symbolizes OP-TEE abort dumps',
+                epilog=epilog)
+    parser.add_argument('-d', '--dir', action='append', nargs='+',
+        help='Search for ELF file in DIR. tee.elf is needed to decode '
+             'a TEE Core or pseudo-TA abort, while <TA_uuid>.elf is required '
+             'if a user-mode TA has crashed.')
+    parser.add_argument('-s', '--strip_path',
+        help='Strip STRIP_PATH from file paths')
+
+    return parser.parse_args()
+
+class Symbolizer(object):
+    def __init__(self, out, dirs, strip_path):
+        self._out = out
+        self._dirs = dirs
+        self._strip_path = strip_path
+        self._addr2line = None
+        self._bin = 'tee.elf'
+        self.reset()
+
+    def get_elf(self, elf_or_uuid):
+        if not elf_or_uuid.endswith('.elf'):
+            elf_or_uuid += '.elf'
+        for d in self._dirs:
+            elf = glob.glob(d + '/' + elf_or_uuid)
+            if elf:
+                return elf[0]
+
+    def spawn_addr2line(self):
+        if not self._addr2line:
+            elf = self.get_elf(self._bin)
+            if not elf:
+                return
+            if self._arch == 'arm':
+                cmd = 'arm-linux-gnueabihf-addr2line'
+            elif self._arch == 'aarch64':
+                cmd = 'aarch64-linux-gnu-addr2line'
+            else:
+                return
+            self._addr2line = subprocess.Popen([cmd, '-f', '-p', '-e', elf],
+                                                stdin = subprocess.PIPE,
+                                                stdout = subprocess.PIPE)
+
+    def resolve(self, addr):
+        offs = self._load_addr
+        if int(offs, 0) > int(addr, 0):
+            return '???'
+        reladdr = '0x{:x}'.format(int(addr, 0) - int(offs, 0))
+        self.spawn_addr2line()
+        if not self._addr2line:
+            return '???'
+        try:
+            print >> self._addr2line.stdin, reladdr
+            ret = self._addr2line.stdout.readline().rstrip('\n')
+        except IOError:
+            ret = '!!!'
+        return ret
+
+    def reset(self):
+        self._call_stack_found = False
+        self._load_addr = '0'
+        if self._addr2line:
+            self._addr2line.terminate()
+            self._addr2line = None
+        self._arch = 'arm'
+
+    def write(self, line):
+            if self._call_stack_found:
+                match = re.search(STACK_ADDR_RE, line)
+                if match:
+                    addr = match.group('addr')
+                    pre = match.start('addr')
+                    post = match.end('addr')
+                    self._out.write(line[:pre])
+                    self._out.write(addr)
+                    res = self.resolve(addr)
+                    if self._strip_path:
+                        res = re.sub(re.escape(self._strip_path) + '/*', '',
+                              res)
+                    self._out.write(' ' + res)
+                    self._out.write(line[post:])
+                    return
+                else:
+                    self.reset()
+            match = re.search(CALL_STACK_RE, line)
+            if match:
+                self._call_stack_found = True
+            match = re.search(TA_UUID_RE, line)
+            if match:
+                self._bin = match.group('uuid')
+            match = re.search(TA_INFO_RE, line)
+            if match:
+                self._arch = match.group('arch')
+                self._load_addr = match.group('load_addr')
+            match = re.search(X64_REGS_RE, line)
+            if match:
+                # Assume _arch represents the TEE core. If we have a TA dump,
+                # it will be overwritten later
+                self._arch = 'aarch64'
+            self._out.write(line)
+
+    def flush(self):
+        self._out.flush()
+
+def main():
+    args = get_args()
+    if args.dir:
+        # Flatten list in case -d is used several times *and* with multiple
+        # arguments
+        args.dirs = [item for sublist in args.dir for item in sublist]
+    else:
+        args.dirs = []
+    symbolizer = Symbolizer(sys.stdout, args.dirs, args.strip_path)
+
+    for line in sys.stdin:
+        symbolizer.write(line)
+    symbolizer.flush()
+
+if __name__ == "__main__":
+    main()