scripts/symbolize.py: try to resolve abort address to symbol/section[+offset]

Use nm and objdump to find the symbol and ELF section that match the
address reported in the the abort line. This can help debug writes to
read-only data or unaligned accesses to global data, for example.

If the address can be resolved to a symbol plus some offset and/or a
section plus some offset, the abort line is printed again just before
the call stack with the symbolic information added. Not that the
translation cannot be done immediately when the abort line is seen
because at this point we don't know the architecture, and we don't have
the load address of the TA.

Here is an example (the line added by this patch is marked with >>):

   User TA data-abort at address 0x1314d0 (write permission fault)
    fsr 0x0000080f  ttbr0 0x0e07a06a  ttbr1 0x0e07406a  cidr 0x1
    cpu #0          cpsr 0x60000030
    r0 0x00000001      r4 0x00102780    r8 0x00000000   r12 0xb736e358
    r1 0x00102724      r5 0x00121e4f    r9 0x00000000    sp 0x001026e0
    r2 0x00000001      r6 0x001026dc   r10 0x00000000    lr 0x00105cf1
    r3 0x001314d0      r7 0x001026e0   r11 0x00000000    pc 0x00105790
   Status of TA 5b9e0e40-2636-11e1-ad9e-0002a5d5c51b (0xe073b70) (active)
    arch: arm  load address: 0x103000  ctx-idr: 1
    stack: 0x100000 10240
    region 0: va 0x100000 pa 0xe21e000 size 0x3000
    region 1: va 0x103000 pa 0xe100000 size 0x2e000
    region 2: va 0x131000 pa 0xe12e000 size 0xa000
    region 3: va 0x13b000 pa 0xe138000 size 0xe6000
    region 4: va 0 pa 0 size 0
    region 5: va 0 pa 0 size 0
    region 6: va 0 pa 0 size 0
    region 7: va 0 pa 0 size 0
>> User TA data-abort at address 0x1314d0 const_val+4 .rodata+4452 (write permission fault)
   Call stack:
    0x00105790 ta_entry_bad_mem_access at optee_test/ta/os_test/os_test.c:917
    0x00105cf1 TA_InvokeCommandEntryPoint at optee_test/ta/os_test/ta_entry.c:101
    0x00121e33 entry_invoke_command at optee_os/lib/libutee/arch/arm/user_ta_entry.c:207
    0x00121e8f __utee_entry at optee_os/lib/libutee/arch/arm/user_ta_entry.c:235

The test TA does the following:

  const int const_val[3] = { 1, };
  /* ... */
  ((int *)const_val)[1] = 2;

Suggested-by: Zeng Tao <prime.zeng@hisilicon.com>
Signed-off-by: Jerome Forissier <jerome.forissier@linaro.org>
Reviewed-by: Joakim Bech <joakim.bech@linaro.org>
Reviewed-by: Igor Opaniuk <igor.opaniuk@linaro.org>
diff --git a/scripts/symbolize.py b/scripts/symbolize.py
index 8a69117..6965629 100755
--- a/scripts/symbolize.py
+++ b/scripts/symbolize.py
@@ -39,6 +39,7 @@
 CALL_STACK_RE = re.compile('Call stack:')
 STACK_ADDR_RE = re.compile(r':  (?P<addr>0x[0-9a-f]+)')
 X64_REGS_RE = re.compile(':  x0  [0-9a-f]{16} x1  [0-9a-f]{16}')
+ABORT_ADDR_RE = re.compile('-abort at address (?P<addr>0x[0-9a-f]+)')
 
 epilog = '''
 This scripts reads an OP-TEE abort message from stdin and adds debug
@@ -98,28 +99,36 @@
             if elf:
                 return elf[0]
 
+    def arch_prefix(self, cmd):
+        if self._arch == 'arm':
+            return 'arm-linux-gnueabihf-' + cmd
+        elif self._arch == 'aarch64':
+            return 'aarch64-linux-gnu-' + cmd
+        else:
+            return ''
+
     def spawn_addr2line(self):
         if not self._addr2line:
             elf = self.get_elf(self._bin)
             if not elf:
                 return
-            if self._arch == 'arm':
-                cmd = 'arm-linux-gnueabihf-addr2line'
-            elif self._arch == 'aarch64':
-                cmd = 'aarch64-linux-gnu-addr2line'
-            else:
+            cmd = self.arch_prefix('addr2line')
+            if not cmd:
                 return
             self._addr2line = subprocess.Popen([cmd, '-f', '-p', '-e', elf],
                                                 stdin = subprocess.PIPE,
                                                 stdout = subprocess.PIPE)
 
-    def resolve(self, addr):
+    def subtract_load_addr(self, addr):
         offs = self._load_addr
         if int(offs, 16) > int(addr, 16):
-            return '???'
-        reladdr = '0x{:x}'.format(int(addr, 16) - int(offs, 16))
+            return ''
+        return '0x{:x}'.format(int(addr, 16) - int(offs, 16))
+
+    def resolve(self, addr):
+        reladdr = self.subtract_load_addr(addr)
         self.spawn_addr2line()
-        if not self._addr2line:
+        if not reladdr or not self._addr2line:
             return '???'
         try:
             print >> self._addr2line.stdin, reladdr
@@ -128,6 +137,89 @@
             ret = '!!!'
         return ret
 
+    def symbol_plus_offset(self, addr):
+        ret = ''
+        prevsize = 0
+        reladdr = self.subtract_load_addr(addr)
+        elf = self.get_elf(self._bin)
+        cmd = self.arch_prefix('nm')
+        if not reladdr or not elf or not cmd:
+            return ''
+        ireladdr = int(reladdr, 0)
+        nm = subprocess.Popen([cmd, '--numeric-sort', '--print-size', elf],
+                               stdin = subprocess.PIPE,
+                               stdout = subprocess.PIPE)
+        for line in iter(nm.stdout.readline, ''):
+            try:
+                addr, size, _, name = line.split()
+            except:
+                # Size is missing
+                addr, _, name = line.split()
+                size = '0'
+            iaddr = int(addr, 16)
+            isize = int(size, 16)
+            if iaddr == ireladdr:
+                ret = name
+                break
+            if iaddr < ireladdr and iaddr + isize >= ireladdr:
+                offs = ireladdr - iaddr
+                ret = name + '+' + str(offs)
+                break
+            if iaddr > ireladdr and prevsize == 0:
+                offs = iaddr + ireladdr
+                ret = prevname + '+' + str(offs)
+                break
+            prevsize = size
+            prevname = name
+        nm.terminate()
+        return ret
+
+    def section_plus_offset(self, addr):
+        ret = ''
+        reladdr = self.subtract_load_addr(addr)
+        elf = self.get_elf(self._bin)
+        cmd = self.arch_prefix('objdump')
+        if not reladdr or not elf or not cmd:
+            return ''
+        iaddr = int(reladdr, 0)
+        objdump = subprocess.Popen([cmd, '--section-headers', elf],
+                                    stdin = subprocess.PIPE,
+                                    stdout = subprocess.PIPE)
+        for line in iter(objdump.stdout.readline, ''):
+            try:
+                idx, name, size, vma, lma, offs, algn = line.split()
+            except:
+                continue;
+            ivma = int(vma, 16)
+            isize = int(size, 16)
+            if ivma == iaddr:
+                ret = name
+                break
+            if ivma < iaddr and ivma + isize >= iaddr:
+                offs = iaddr - ivma
+                ret = name + '+' + str(offs)
+                break
+        objdump.terminate()
+        return ret
+
+    def process_abort(self, line):
+        ret = ''
+        match = re.search(ABORT_ADDR_RE, line)
+        addr = match.group('addr')
+        pre = match.start('addr')
+        post = match.end('addr')
+        sym = self.symbol_plus_offset(addr)
+        sec = self.section_plus_offset(addr)
+        if sym or sec:
+            ret += line[:pre]
+            ret += addr
+            if sym:
+                ret += ' ' + sym
+            if sec:
+                ret += ' ' + sec
+            ret += line[post:]
+        return ret
+
     def reset(self):
         self._call_stack_found = False
         self._load_addr = '0'
@@ -135,6 +227,7 @@
             self._addr2line.terminate()
             self._addr2line = None
         self._arch = 'arm'
+        self._saved_abort_line = ''
 
     def write(self, line):
             if self._call_stack_found:
@@ -157,6 +250,10 @@
             match = re.search(CALL_STACK_RE, line)
             if match:
                 self._call_stack_found = True
+                # Here is a good place to resolve the abort address because we
+                # have all the information we need
+                if self._saved_abort_line:
+                    self._out.write(self.process_abort(self._saved_abort_line))
             match = re.search(TA_UUID_RE, line)
             if match:
                 self._bin = match.group('uuid')
@@ -169,6 +266,11 @@
                 # Assume _arch represents the TEE core. If we have a TA dump,
                 # it will be overwritten later
                 self._arch = 'aarch64'
+            match = re.search(ABORT_ADDR_RE, line)
+            if match:
+                # At this point the arch and TA load address are unknown.
+                # Save the line so We can translate the abort address later.
+                self._saved_abort_line = line
             self._out.write(line)
 
     def flush(self):