blob: 701aeacb390ba5a49c38f21f826e50a2c6dd4591 [file] [log] [blame]
Basil Eljuse4b14afb2020-09-30 13:07:23 +01001# !/usr/bin/env python
2###############################################################################
Jelle Sels83f141e2022-08-01 15:17:40 +00003# Copyright (c) 2020-2022, ARM Limited and Contributors. All rights reserved.
Basil Eljuse4b14afb2020-09-30 13:07:23 +01004#
5# SPDX-License-Identifier: BSD-3-Clause
6###############################################################################
7
8###############################################################################
9# FILE: intermediate_layer.py
10#
11# DESCRIPTION: Creates an intermediate json file with information provided
12# by the configuration json file, dwarf signatures and trace
13# files.
14#
15###############################################################################
16
17import os
18import re
19import glob
20import argparse
21import subprocess
22import json
23from argparse import RawTextHelpFormatter
24import logging
25import time
26
27__version__ = "6.0"
28
29# Static map that defines the elf file source type in the intermediate json
30ELF_MAP = {
31 "bl1": 0,
32 "bl2": 1,
33 "bl31": 2,
34 "bl32": 3,
35 "scp_ram": 10,
36 "scp_rom": 11,
37 "mcp_rom": 12,
38 "mcp_ram": 13,
39 "custom_offset": 100
40}
41
42
43def os_command(command, show_command=False):
44 """
45 Function that execute an os command, on fail exit the program
46
47 :param command: OS command as string
48 :param show_command: Optional argument to print the command in stdout
49 :return: The string output of the os command
50 """
51 out = ""
52 try:
53 if show_command:
54 print("OS command: {}".format(command))
55 out = subprocess.check_output(
56 command, stderr=subprocess.STDOUT, shell=True)
57 except subprocess.CalledProcessError as ex:
58 raise Exception(
59 "Exception running command '{}': {}({})".format(
60 command, ex.output, ex.returncode))
61 return out.decode("utf8")
62
63
64def load_stats_from_traces(trace_globs):
65 """
66 Function to process and consolidate statistics from trace files
67
68 :param trace_globs: List of trace file patterns
69 :return: Dictionary with stats from trace files i.e.
70 {mem address in decimal}=(times executed, inst size)
71 """
72 stats = {}
73 stat_size = {}
74
75 # Make a list of unique trace files
76 trace_files = []
77 for tg in trace_globs:
78 trace_files.extend(glob.glob(tg))
79 trace_files = set(trace_files)
80
81 if not trace_files:
82 raise Exception("No trace files found for '{}'".format(trace_globs))
83 # Load stats from the trace files
84 for trace_file in trace_files:
85 try:
86 with open(trace_file, 'r') as f:
87 for line in f:
88 data = line.split()
89 address = int(data[0], 16)
90 stat = int(data[1])
91 size = int(data[2])
92 stat_size[address] = size
93 if address in stats:
94 stats[address] += stat
95 else:
96 stats[address] = stat
97 except Exception as ex:
98 logger.error("@Loading stats from trace files:{}".format(ex))
99 # Merge the two dicts
100 for address in stats:
101 stats[address] = (stats[address], stat_size[address])
102 return stats
103
104
105def get_code_sections_for_binary(elf_name):
106 """
107 Function to return the ranges of memory address for sections of code
108 in the elf file
109
110 :param elf_name: Elf binary file name
111 :return: List of code sections tuples, i.e. (section type, initial
112 address, end address)
113 """
114 command = """%s -h %s | grep -B 1 CODE | grep -v CODE \
115 | awk '{print $2" "$4" "$3}'""" % (OBJDUMP, elf_name)
116 text_out = os_command(command)
117 sections = text_out.split('\n')
118 sections.pop()
119 secs = []
120 for sec in sections:
121 try:
122 d = sec.split()
123 secs.append((d[0], int(d[1], 16), int(d[2], 16)))
124 except Exception as ex:
125 logger.error(
126 "@Returning memory address code sections:".format(ex))
127 return secs
128
129
130def get_executable_ranges_for_binary(elf_name):
131 """
132 Get function ranges from an elf file
133
134 :param elf_name: Elf binary file name
135 :return: List of tuples for ranges i.e. (range start, range end)
136 """
137 # Parse all $x / $d symbols
138 symbol_table = []
139 command = r"""%s -s %s | awk '/\$[xatd]/ {print $2" "$8}'""" % (
140 READELF, elf_name)
141 text_out = os_command(command)
142 lines = text_out.split('\n')
143 lines.pop()
144 for line in lines:
145 try:
146 data = line.split()
147 address = int(data[0], 16)
148 _type = 'X' if data[1] in ['$x', '$t', '$a'] else 'D'
149 except Exception as ex:
150 logger.error("@Getting executable ranges:".format(ex))
151 symbol_table.append((address, _type))
152
153 # Add markers for end of code sections
154 sections = get_code_sections_for_binary(elf_name)
155 for sec in sections:
156 symbol_table.append((sec[1] + sec[2], 'S'))
157
158 # Sort by address
159 symbol_table = sorted(symbol_table, key=lambda tup: tup[0])
160
161 # Create ranges (list of START/END tuples)
162 ranges = []
163 range_start = symbol_table[0][0]
164 rtype = symbol_table[0][1]
165 for sym in symbol_table:
166 if sym[1] != rtype:
167 if rtype == 'X':
168 # Substract one because the first address of the
169 # next range belongs to the next range.
170 ranges.append((range_start, sym[0] - 1))
171 range_start = sym[0]
172 rtype = sym[1]
173 return ranges
174
175
176def list_of_functions_for_binary(elf_name):
177 """
178 Get an array of the functions in the elf file
179
180 :param elf_name: Elf binary file name
181 :return: An array of function address start, function address end,
182 function dwarf signature (sources) addressed by function name
183 """
184 _functions = {}
185 command = "%s -t %s | awk 'NR>4' | sed /^$/d" % (OBJDUMP, elf_name)
186 symbols_output = os_command(command)
187 rex = r'([0-9a-fA-F]+) (.{7}) ([^ ]+)[ \t]([0-9a-fA-F]+) (.*)'
188 symbols = symbols_output.split('\n')[:-1]
189 for sym in symbols:
190 try:
191 symbol_details = re.findall(rex, sym)
192 symbol_details = symbol_details[0]
193 if 'F' not in symbol_details[1]:
194 continue
195 function_name = symbol_details[4]
196 # We don't want the .hidden for hidden functions
197 if function_name.startswith('.hidden '):
198 function_name = function_name[len('.hidden '):]
199 if function_name not in _functions:
200 _functions[function_name] = {'start': symbol_details[0],
201 'end': symbol_details[3],
202 'sources': False}
203 else:
204 logger.warning("'{}' duplicated in '{}'".format(
205 function_name,
206 elf_name))
207 except Exception as ex:
208 logger.error("@Listing functions at file {}: {}".format(
209 elf_name,
210 ex))
211 return _functions
212
213
214def apply_functions_exclude(elf_config, functions):
215 """
216 Remove excluded functions from the list of functions
217
218 :param elf_config: Config for elf binary file
219 :param functions: Array of functions in the binary elf file
220 :return: Tuple with included and excluded functions
221 """
222 if 'exclude_functions' not in elf_config:
223 return functions, []
224 incl = {}
225 excl = {}
226 for fname in functions:
227 exclude = False
228 for rex in elf_config['exclude_functions']:
229 if re.match(rex, fname):
230 exclude = True
231 excl[fname] = functions[fname]
232 break
233 if not exclude:
234 incl[fname] = functions[fname]
235 return incl, excl
236
237
238def remove_workspace(path, workspace):
239 """
240 Get the relative path to a given workspace
241
242 :param path: Path relative to the workspace to be returned
243 :param workspace: Path.
244 """
245 ret = path if workspace is None else os.path.relpath(path, workspace)
246 # print("{} => {}".format(path, ret))
247 return ret
248
249
250def get_function_line_numbers(source_file):
251 """
252 Using ctags get all the function names with their line numbers
253 within the source_file
254
255 :return: Dictionary with function name as key and line number as value
256 """
Saul Romeroc1aa68d2021-07-22 16:56:07 +0100257 command = "ctags -x --c-kinds=f {}".format(source_file)
Basil Eljuse4b14afb2020-09-30 13:07:23 +0100258 fln = {}
259 try:
Saul Romeroc1aa68d2021-07-22 16:56:07 +0100260 function_lines = os_command(command).split("\n")
Basil Eljuse4b14afb2020-09-30 13:07:23 +0100261 for line in function_lines:
262 cols = line.split()
263 if len(cols) < 3:
264 continue
265 if cols[1] == "function":
266 fln[cols[0]] = int(cols[2])
267 elif cols[1] == "label" and cols[0] == "func":
268 fln[cols[-1]] = int(cols[2])
269 except BaseException:
270 logger.warning("Warning: Can't get all function line numbers from %s" %
271 source_file)
Saul Romeroc1aa68d2021-07-22 16:56:07 +0100272 except Exception as ex:
273 logger.warning(f"Warning: Unknown error '{ex}' when executing command '{command}'")
274 return {}
275
Basil Eljuse4b14afb2020-09-30 13:07:23 +0100276 return fln
277
278
279class FunctionLineNumbers(object):
280
281 def __init__(self, workspace):
282 self.filenames = {}
283 self.workspace = workspace
284
285 def get_line_number(self, filename, function_name):
286 if not FUNCTION_LINES_ENABLED:
287 return 0
288 if filename not in self.filenames:
289 newp = os.path.join(self.workspace, filename)
290 self.filenames[filename] = get_function_line_numbers(newp)
291 return 0 if function_name not in self.filenames[filename] else \
292 self.filenames[filename][function_name]
293
294
295class PostProcessCC(object):
296 """Class used to process the trace data along with the dwarf
297 signature files to produce an intermediate layer in json with
298 code coverage in assembly and c source code.
299 """
300
301 def __init__(self, _config, local_workspace):
302 self._data = {}
303 self.config = _config
304 self.local_workspace = local_workspace
305 self.elfs = self.config['elfs']
306 # Dictionary with stats from trace files {address}=(times executed,
307 # inst size)
308 self.traces_stats = {}
309 # Dictionary of unique assembly line memory address against source
310 # file location
311 # {assembly address} = (opcode, source file location, line number in
312 # the source file, times executed)
313 self.asm_lines = {}
314 # Dictionary of {source file location}=>{'lines': {'covered':Boolean,
315 # 'elf_index'; {elf index}=>{assembly address}=>(opcode,
316 # times executed),
317 # 'functions': {function name}=>is covered(boolean)}
318 self.source_files_coverage = {}
319 self.functions = []
320 # Unique set of elf list of files
321 self.elf_map = {}
322 # For elf custom mappings
323 self.elf_custom = None
324
325 def process(self):
326 """
327 Public method to process the trace files and dwarf signatures
328 using the information contained in the json configuration file.
329 This method writes the intermediate json file output linking
330 the trace data and c source and assembly code.
331 """
332 self.source_files_coverage = {}
333 self.asm_lines = {}
334 # Initialize for unknown elf files
335 self.elf_custom = ELF_MAP["custom_offset"]
336 sources_config = {}
337 print("Generating intermediate json layer '{}'...".format(
338 self.config['parameters']['output_file']))
339 for elf in self.elfs:
340 # Gather information
341 elf_name = elf['name']
342 os_command("ls {}".format(elf_name))
343 # Trace data
344 self.traces_stats = load_stats_from_traces(elf['traces'])
345 prefix = self.config['parameters']['workspace'] \
346 if self.config['configuration']['remove_workspace'] else \
347 None
348 functions_list = list_of_functions_for_binary(elf_name)
349 (functions_list, excluded_functions) = apply_functions_exclude(
350 elf, functions_list)
351 # Produce code coverage
352 self.dump_sources(elf_name, functions_list, prefix)
353 sources_config = self.config['parameters']['sources']
354 # Now check code coverage in the functions with no dwarf signature
355 # (sources)
356 nf = {f: functions_list[f] for f in
357 functions_list if not
358 functions_list[f]["sources"]}
359 self.process_fn_no_sources(nf)
360 # Write to the intermediate json file
361 data = {"source_files": self.source_files_coverage,
362 "configuration": {
363 "sources": sources_config,
364 "metadata": "" if 'metadata' not in
365 self.config['parameters'] else
366 self.config['parameters']['metadata'],
367 "elf_map": self.elf_map
368 }
369 }
370 json_data = json.dumps(data, indent=4, sort_keys=True)
371 with open(self.config['parameters']['output_file'], "w") as f:
372 f.write(json_data)
373
374 def dump_sources(self, elf_filename, function_list, prefix=None):
375 """
376 Process an elf file i.e. match the source and asm lines against trace
377 files (coverage).
378
379 :param elf_filename: Elf binary file name
380 :param function_list: List of functions in the elf file i.e.
381 [(address start, address end, function name)]
382 :param prefix: Optional path name to be removed at the start of source
383 file locations
384 """
385 command = "%s -Sl %s" % (OBJDUMP, elf_filename)
386 dump = os_command(command)
Jelle Sels83f141e2022-08-01 15:17:40 +0000387 dump += "\n0 <null>:" # For pattern matching the last function
Basil Eljuse4b14afb2020-09-30 13:07:23 +0100388 elf_name = os.path.splitext(os.path.basename(elf_filename))[0]
389 # Object that handles the function line numbers in
390 # their filename
391 function_line_numbers = FunctionLineNumbers(self.local_workspace)
392 # To map the elf filename against an index
393 if elf_name not in self.elf_map:
394 if elf_name in ELF_MAP:
395 self.elf_map[elf_name] = ELF_MAP[elf_name]
396 else:
397 self.elf_map[elf_name] = self.elf_custom
398 self.elf_custom += 1
399 elf_index = self.elf_map[elf_name]
400 # The function groups have 2 elements:
401 # Function's block name, Function's block code
402 function_groups = re.findall(
Jelle Sels83f141e2022-08-01 15:17:40 +0000403 r"(?s)[0-9a-fA-F]+ <([a-zA-Z0-9_]+)>:\n(.+?)(?=[A-Fa-f0-9]* <[a-zA-Z0-9_]+>:)",
Basil Eljuse4b14afb2020-09-30 13:07:23 +0100404 dump, re.DOTALL | re.MULTILINE)
405 # Pointer to files dictionary
406 source_files = self.source_files_coverage
407 for function_group in function_groups:
408 if len(function_group) != 2:
409 continue
410 block_function_name, block_code = function_group
411 block_code += "\n"
412 # Find if the function has C source code filename
413 function_signature_group = re.findall(
414 r"(?s){}\(\):\n(/.+?):[0-9]+.*(?:\r*\n\n|\n$)".format(
415 block_function_name), block_code, re.DOTALL | re.MULTILINE)
416 if not function_signature_group:
417 continue # Function does not have dwarf signature (sources)
saul-romero-armc8030142021-01-15 10:34:06 +0000418 if not block_function_name in function_list:
419 print("Warning:Function '{}' not found in function list!!!".format(block_function_name))
420 continue # Function not found in function list
Basil Eljuse4b14afb2020-09-30 13:07:23 +0100421 function_list[block_function_name]["sources"] = True
422 block_function_source_file = remove_workspace(
423 function_signature_group[0], prefix)
424 fn_line_number = function_line_numbers.get_line_number(
425 block_function_source_file, block_function_name)
426 if block_function_source_file not in source_files:
427 source_files[block_function_source_file] = {"functions": {},
428 "lines": {}}
429 source_files[block_function_source_file]["functions"][
430 block_function_name] = {"covered": False,
431 "line_number": fn_line_number}
432 # Now lets check the block code
433 # The source code groups have 5 elements:
434 # Function for the statements (optional), Source file for the asm
435 # statements,
436 # line number for the asm statements, asm statements, lookahead
437 # (ignored)
438 source_code_groups = re.findall(SOURCE_PATTERN, block_code,
439 re.DOTALL | re.MULTILINE)
440 is_function_block_covered = False
441 # When not present the last function name applies
442 statements_function_name = block_function_name
443 for source_code_group in source_code_groups:
444 if len(source_code_group) != 5:
445 continue
446 fn_name, source_file, ln, asm_code, _ = source_code_group
447 if not fn_name:
448 # The statement belongs to the most recent function
449 fn_name = statements_function_name
450 else:
451 # Usually in the first iteration fn_name is not empty and
452 # is the function's name block
453 statements_function_name = fn_name
454 if statements_function_name in function_list:
455 # Some of the functions within a block are not defined in
456 # the function list dump
457 function_list[statements_function_name]["sources"] = True
458 statements_source_file = remove_workspace(source_file, prefix)
459 if statements_source_file not in source_files:
460 source_files[statements_source_file] = {"functions": {},
461 "lines": {}}
462 if statements_function_name not in \
463 source_files[statements_source_file]["functions"]:
464 fn_line_number = function_line_numbers.get_line_number(
465 statements_source_file,
466 statements_function_name)
467 source_files[statements_source_file]["functions"][
468 statements_function_name] = \
469 {"covered": False, "line_number": fn_line_number}
470 if ln not in source_files[statements_source_file]["lines"]:
471 source_files[statements_source_file]["lines"][ln] = \
472 {"covered": False, "elf_index": {}}
473 source_file_ln = source_files[statements_source_file]["lines"][
474 ln]
475 asm_line_groups = re.findall(
476 r"(?s)([a-fA-F0-9]+):\t(.+?)(?:\n|$)",
477 asm_code, re.DOTALL | re.MULTILINE)
478 for asm_line in asm_line_groups:
479 if len(asm_line) != 2:
480 continue
481 hex_line_number, opcode = asm_line
482 dec_address = int(hex_line_number, 16)
483 times_executed = 0 if dec_address not in self.traces_stats \
484 else self.traces_stats[dec_address][0]
485 if times_executed > 0:
486 is_function_block_covered = True
487 source_file_ln["covered"] = True
488 source_files[statements_source_file]["functions"][
489 statements_function_name]["covered"] = True
490 if elf_index not in source_file_ln["elf_index"]:
491 source_file_ln["elf_index"][elf_index] = {}
492 if dec_address not in \
493 source_file_ln["elf_index"][elf_index]:
494 source_file_ln["elf_index"][elf_index][dec_address] = (
495 opcode, times_executed)
496 source_files[block_function_source_file]["functions"][
497 block_function_name]["covered"] |= is_function_block_covered
498
499 def process_fn_no_sources(self, function_list):
500 """
501 Checks function coverage for functions with no dwarf signature i.e
502 sources.
503
504 :param function_list: Dictionary of functions to be checked
505 """
506 if not FUNCTION_LINES_ENABLED:
507 return # No source code at the workspace
508 address_seq = sorted(self.traces_stats.keys())
509 for function_name in function_list:
510 # Just check if the start address is in the trace logs
511 covered = function_list[function_name]["start"] in address_seq
512 # Find the source file
513 files = os_command(("grep --include *.c --include *.s -nrw '{}' {}"
514 "| cut -d: -f1").format(function_name,
515 self.local_workspace))
516 unique_files = set(files.split())
517 sources = []
518 line_number = 0
519 for source_file in unique_files:
520 d = get_function_line_numbers(source_file)
521 if function_name in d:
522 line_number = d[function_name]
523 sources.append(source_file)
524 if len(sources) > 1:
525 logger.warning("'{}' declared in {} files:{}".format(
526 function_name, len(sources),
527 ", ".join(sources)))
528 elif len(sources) == 1:
529 source_file = remove_workspace(sources[0],
530 self.local_workspace)
531 if source_file not in self.source_files_coverage:
532 self.source_files_coverage[source_file] = {"functions": {},
533 "lines": {}}
534 if function_name not in \
535 self.source_files_coverage[source_file]["functions"] or \
536 covered:
537 self.source_files_coverage[source_file]["functions"][
538 function_name] = {"covered": covered,
539 "line_number": line_number}
540 else:
541 logger.warning("Function '{}' not found in sources.".format(
542 function_name))
543
544
545json_conf_help = """
546Produces an intermediate json layer for code coverage reporting
547using an input json configuration file.
548
549Input json configuration file format:
550{
551 "configuration":
552 {
553 "remove_workspace": <true if 'workspace' must be from removed from the
554 path of the source files>,
555 "include_assembly": <true to include assembly source code in the
556 intermediate layer>
557 },
558 "parameters":
559 {
560 "objdump": "<Path to the objdump binary to handle dwarf signatures>",
561 "readelf: "<Path to the readelf binary to handle dwarf signatures>",
562 "sources": [ <List of source code origins, one or more of the next
563 options>
564 {
565 "type": "git",
566 "URL": "<URL git repo>",
567 "COMMIT": "<Commit id>",
568 "REFSPEC": "<Refspec>",
569 "LOCATION": "<Folder within 'workspace' where this source
570 is located>"
571 },
572 {
573 "type": "http",
574 "URL": <URL link to file>",
575 "COMPRESSION": "xz",
576 "LOCATION": "<Folder within 'workspace' where this source
577 is located>"
578 }
579 ],
580 "workspace": "<Workspace folder where the source code was located to
581 produce the elf/axf files>",
582 "output_file": "<Intermediate layer output file name and location>",
583 "metadata": {<Metadata objects to be passed to the intermediate json
584 files>}
585 },
586 "elfs": [ <List of elf files to be traced/parsed>
587 {
588 "name": "<Full path name to elf/axf file>",
589 "traces": [ <List of trace files to be parsed for this
590 elf/axf file>
591 "Full path name to the trace file,"
592 ]
593 }
594 ]
595}
596"""
597OBJDUMP = None
598READELF = None
599FUNCTION_LINES_ENABLED = None
Saul Romeroc1aa68d2021-07-22 16:56:07 +0100600SOURCE_PATTERN = (r'(?s)([a-zA-Z0-9_]+)?(?:\(\):\n)?(^/.+?):([0-9]+)'
Jelle Sels83f141e2022-08-01 15:17:40 +0000601 r'(?: \(.+?\))?\n(.+?)(?=\n/|([a-zA-Z0-9_]+\(\):))')
Basil Eljuse4b14afb2020-09-30 13:07:23 +0100602
603
604def main():
605 global OBJDUMP
606 global READELF
607 global FUNCTION_LINES_ENABLED
608
609 parser = argparse.ArgumentParser(epilog=json_conf_help,
610 formatter_class=RawTextHelpFormatter)
611 parser.add_argument('--config-json', metavar='PATH',
612 dest="config_json", default='config_file.json',
613 help='JSON configuration file', required=True)
614 parser.add_argument('--local-workspace', default="",
615 help=('Local workspace folder where source code files'
616 ' and folders resides'))
617 args = parser.parse_args()
618 try:
619 with open(args.config_json, 'r') as f:
620 config = json.load(f)
621 except Exception as ex:
622 print("Error at opening and processing JSON: {}".format(ex))
623 return
624 # Setting toolchain binary tools variables
625 OBJDUMP = config['parameters']['objdump']
626 READELF = config['parameters']['readelf']
627 # Checking if are installed
628 os_command("{} --version".format(OBJDUMP))
629 os_command("{} --version".format(READELF))
630
631 if args.local_workspace != "":
632 # Checking ctags installed
633 try:
634 os_command("ctags --version")
635 except BaseException:
636 print("Warning!: ctags not installed/working function line numbers\
637 will be set to 0. [{}]".format(
638 "sudo apt install exuberant-ctags"))
639 else:
640 FUNCTION_LINES_ENABLED = True
641
642 pp = PostProcessCC(config, args.local_workspace)
643 pp.process()
644
645
646if __name__ == '__main__':
647 logging.basicConfig(filename='intermediate_layer.log', level=logging.DEBUG,
648 format=('%(asctime)s %(levelname)s %(name)s '
649 '%(message)s'))
650 logger = logging.getLogger(__name__)
651 start_time = time.time()
652 main()
653 elapsed_time = time.time() - start_time
654 print("Elapsed time: {}s".format(elapsed_time))