blob: 2d11824aa0a48a993deb2263c455596c26fcb35a [file] [log] [blame]
Basil Eljuse4b14afb2020-09-30 13:07:23 +01001# !/usr/bin/env python
2###############################################################################
3# Copyright (c) 2020, ARM Limited and Contributors. All rights reserved.
4#
5# SPDX-License-Identifier: BSD-3-Clause
6###############################################################################
7
8###############################################################################
9# FILE: intermediate_layer.py
10#
11# DESCRIPTION: Creates an intermediate json file with information provided
12# by the configuration json file, dwarf signatures and trace
13# files.
14#
15###############################################################################
16
17import os
18import re
19import glob
20import argparse
21import subprocess
22import json
23from argparse import RawTextHelpFormatter
24import logging
25import time
26
27__version__ = "6.0"
28
29# Static map that defines the elf file source type in the intermediate json
30ELF_MAP = {
31 "bl1": 0,
32 "bl2": 1,
33 "bl31": 2,
34 "bl32": 3,
35 "scp_ram": 10,
36 "scp_rom": 11,
37 "mcp_rom": 12,
38 "mcp_ram": 13,
39 "custom_offset": 100
40}
41
42
43def os_command(command, show_command=False):
44 """
45 Function that execute an os command, on fail exit the program
46
47 :param command: OS command as string
48 :param show_command: Optional argument to print the command in stdout
49 :return: The string output of the os command
50 """
51 out = ""
52 try:
53 if show_command:
54 print("OS command: {}".format(command))
55 out = subprocess.check_output(
56 command, stderr=subprocess.STDOUT, shell=True)
57 except subprocess.CalledProcessError as ex:
58 raise Exception(
59 "Exception running command '{}': {}({})".format(
60 command, ex.output, ex.returncode))
61 return out.decode("utf8")
62
63
64def load_stats_from_traces(trace_globs):
65 """
66 Function to process and consolidate statistics from trace files
67
68 :param trace_globs: List of trace file patterns
69 :return: Dictionary with stats from trace files i.e.
70 {mem address in decimal}=(times executed, inst size)
71 """
72 stats = {}
73 stat_size = {}
74
75 # Make a list of unique trace files
76 trace_files = []
77 for tg in trace_globs:
78 trace_files.extend(glob.glob(tg))
79 trace_files = set(trace_files)
80
81 if not trace_files:
82 raise Exception("No trace files found for '{}'".format(trace_globs))
83 # Load stats from the trace files
84 for trace_file in trace_files:
85 try:
86 with open(trace_file, 'r') as f:
87 for line in f:
88 data = line.split()
89 address = int(data[0], 16)
90 stat = int(data[1])
91 size = int(data[2])
92 stat_size[address] = size
93 if address in stats:
94 stats[address] += stat
95 else:
96 stats[address] = stat
97 except Exception as ex:
98 logger.error("@Loading stats from trace files:{}".format(ex))
99 # Merge the two dicts
100 for address in stats:
101 stats[address] = (stats[address], stat_size[address])
102 return stats
103
104
105def get_code_sections_for_binary(elf_name):
106 """
107 Function to return the ranges of memory address for sections of code
108 in the elf file
109
110 :param elf_name: Elf binary file name
111 :return: List of code sections tuples, i.e. (section type, initial
112 address, end address)
113 """
114 command = """%s -h %s | grep -B 1 CODE | grep -v CODE \
115 | awk '{print $2" "$4" "$3}'""" % (OBJDUMP, elf_name)
116 text_out = os_command(command)
117 sections = text_out.split('\n')
118 sections.pop()
119 secs = []
120 for sec in sections:
121 try:
122 d = sec.split()
123 secs.append((d[0], int(d[1], 16), int(d[2], 16)))
124 except Exception as ex:
125 logger.error(
126 "@Returning memory address code sections:".format(ex))
127 return secs
128
129
130def get_executable_ranges_for_binary(elf_name):
131 """
132 Get function ranges from an elf file
133
134 :param elf_name: Elf binary file name
135 :return: List of tuples for ranges i.e. (range start, range end)
136 """
137 # Parse all $x / $d symbols
138 symbol_table = []
139 command = r"""%s -s %s | awk '/\$[xatd]/ {print $2" "$8}'""" % (
140 READELF, elf_name)
141 text_out = os_command(command)
142 lines = text_out.split('\n')
143 lines.pop()
144 for line in lines:
145 try:
146 data = line.split()
147 address = int(data[0], 16)
148 _type = 'X' if data[1] in ['$x', '$t', '$a'] else 'D'
149 except Exception as ex:
150 logger.error("@Getting executable ranges:".format(ex))
151 symbol_table.append((address, _type))
152
153 # Add markers for end of code sections
154 sections = get_code_sections_for_binary(elf_name)
155 for sec in sections:
156 symbol_table.append((sec[1] + sec[2], 'S'))
157
158 # Sort by address
159 symbol_table = sorted(symbol_table, key=lambda tup: tup[0])
160
161 # Create ranges (list of START/END tuples)
162 ranges = []
163 range_start = symbol_table[0][0]
164 rtype = symbol_table[0][1]
165 for sym in symbol_table:
166 if sym[1] != rtype:
167 if rtype == 'X':
168 # Substract one because the first address of the
169 # next range belongs to the next range.
170 ranges.append((range_start, sym[0] - 1))
171 range_start = sym[0]
172 rtype = sym[1]
173 return ranges
174
175
176def list_of_functions_for_binary(elf_name):
177 """
178 Get an array of the functions in the elf file
179
180 :param elf_name: Elf binary file name
181 :return: An array of function address start, function address end,
182 function dwarf signature (sources) addressed by function name
183 """
184 _functions = {}
185 command = "%s -t %s | awk 'NR>4' | sed /^$/d" % (OBJDUMP, elf_name)
186 symbols_output = os_command(command)
187 rex = r'([0-9a-fA-F]+) (.{7}) ([^ ]+)[ \t]([0-9a-fA-F]+) (.*)'
188 symbols = symbols_output.split('\n')[:-1]
189 for sym in symbols:
190 try:
191 symbol_details = re.findall(rex, sym)
192 symbol_details = symbol_details[0]
193 if 'F' not in symbol_details[1]:
194 continue
195 function_name = symbol_details[4]
196 # We don't want the .hidden for hidden functions
197 if function_name.startswith('.hidden '):
198 function_name = function_name[len('.hidden '):]
199 if function_name not in _functions:
200 _functions[function_name] = {'start': symbol_details[0],
201 'end': symbol_details[3],
202 'sources': False}
203 else:
204 logger.warning("'{}' duplicated in '{}'".format(
205 function_name,
206 elf_name))
207 except Exception as ex:
208 logger.error("@Listing functions at file {}: {}".format(
209 elf_name,
210 ex))
211 return _functions
212
213
214def apply_functions_exclude(elf_config, functions):
215 """
216 Remove excluded functions from the list of functions
217
218 :param elf_config: Config for elf binary file
219 :param functions: Array of functions in the binary elf file
220 :return: Tuple with included and excluded functions
221 """
222 if 'exclude_functions' not in elf_config:
223 return functions, []
224 incl = {}
225 excl = {}
226 for fname in functions:
227 exclude = False
228 for rex in elf_config['exclude_functions']:
229 if re.match(rex, fname):
230 exclude = True
231 excl[fname] = functions[fname]
232 break
233 if not exclude:
234 incl[fname] = functions[fname]
235 return incl, excl
236
237
238def remove_workspace(path, workspace):
239 """
240 Get the relative path to a given workspace
241
242 :param path: Path relative to the workspace to be returned
243 :param workspace: Path.
244 """
245 ret = path if workspace is None else os.path.relpath(path, workspace)
246 # print("{} => {}".format(path, ret))
247 return ret
248
249
250def get_function_line_numbers(source_file):
251 """
252 Using ctags get all the function names with their line numbers
253 within the source_file
254
255 :return: Dictionary with function name as key and line number as value
256 """
257 function_lines = os_command(
258 "ctags -x --c-kinds=f {}".format(source_file)).split("\n")
259 fln = {}
260 try:
261 for line in function_lines:
262 cols = line.split()
263 if len(cols) < 3:
264 continue
265 if cols[1] == "function":
266 fln[cols[0]] = int(cols[2])
267 elif cols[1] == "label" and cols[0] == "func":
268 fln[cols[-1]] = int(cols[2])
269 except BaseException:
270 logger.warning("Warning: Can't get all function line numbers from %s" %
271 source_file)
272 return fln
273
274
275class FunctionLineNumbers(object):
276
277 def __init__(self, workspace):
278 self.filenames = {}
279 self.workspace = workspace
280
281 def get_line_number(self, filename, function_name):
282 if not FUNCTION_LINES_ENABLED:
283 return 0
284 if filename not in self.filenames:
285 newp = os.path.join(self.workspace, filename)
286 self.filenames[filename] = get_function_line_numbers(newp)
287 return 0 if function_name not in self.filenames[filename] else \
288 self.filenames[filename][function_name]
289
290
291class PostProcessCC(object):
292 """Class used to process the trace data along with the dwarf
293 signature files to produce an intermediate layer in json with
294 code coverage in assembly and c source code.
295 """
296
297 def __init__(self, _config, local_workspace):
298 self._data = {}
299 self.config = _config
300 self.local_workspace = local_workspace
301 self.elfs = self.config['elfs']
302 # Dictionary with stats from trace files {address}=(times executed,
303 # inst size)
304 self.traces_stats = {}
305 # Dictionary of unique assembly line memory address against source
306 # file location
307 # {assembly address} = (opcode, source file location, line number in
308 # the source file, times executed)
309 self.asm_lines = {}
310 # Dictionary of {source file location}=>{'lines': {'covered':Boolean,
311 # 'elf_index'; {elf index}=>{assembly address}=>(opcode,
312 # times executed),
313 # 'functions': {function name}=>is covered(boolean)}
314 self.source_files_coverage = {}
315 self.functions = []
316 # Unique set of elf list of files
317 self.elf_map = {}
318 # For elf custom mappings
319 self.elf_custom = None
320
321 def process(self):
322 """
323 Public method to process the trace files and dwarf signatures
324 using the information contained in the json configuration file.
325 This method writes the intermediate json file output linking
326 the trace data and c source and assembly code.
327 """
328 self.source_files_coverage = {}
329 self.asm_lines = {}
330 # Initialize for unknown elf files
331 self.elf_custom = ELF_MAP["custom_offset"]
332 sources_config = {}
333 print("Generating intermediate json layer '{}'...".format(
334 self.config['parameters']['output_file']))
335 for elf in self.elfs:
336 # Gather information
337 elf_name = elf['name']
338 os_command("ls {}".format(elf_name))
339 # Trace data
340 self.traces_stats = load_stats_from_traces(elf['traces'])
341 prefix = self.config['parameters']['workspace'] \
342 if self.config['configuration']['remove_workspace'] else \
343 None
344 functions_list = list_of_functions_for_binary(elf_name)
345 (functions_list, excluded_functions) = apply_functions_exclude(
346 elf, functions_list)
347 # Produce code coverage
348 self.dump_sources(elf_name, functions_list, prefix)
349 sources_config = self.config['parameters']['sources']
350 # Now check code coverage in the functions with no dwarf signature
351 # (sources)
352 nf = {f: functions_list[f] for f in
353 functions_list if not
354 functions_list[f]["sources"]}
355 self.process_fn_no_sources(nf)
356 # Write to the intermediate json file
357 data = {"source_files": self.source_files_coverage,
358 "configuration": {
359 "sources": sources_config,
360 "metadata": "" if 'metadata' not in
361 self.config['parameters'] else
362 self.config['parameters']['metadata'],
363 "elf_map": self.elf_map
364 }
365 }
366 json_data = json.dumps(data, indent=4, sort_keys=True)
367 with open(self.config['parameters']['output_file'], "w") as f:
368 f.write(json_data)
369
370 def dump_sources(self, elf_filename, function_list, prefix=None):
371 """
372 Process an elf file i.e. match the source and asm lines against trace
373 files (coverage).
374
375 :param elf_filename: Elf binary file name
376 :param function_list: List of functions in the elf file i.e.
377 [(address start, address end, function name)]
378 :param prefix: Optional path name to be removed at the start of source
379 file locations
380 """
381 command = "%s -Sl %s" % (OBJDUMP, elf_filename)
382 dump = os_command(command)
383 dump += "\n" # For pattern matching the last \n
384 elf_name = os.path.splitext(os.path.basename(elf_filename))[0]
385 # Object that handles the function line numbers in
386 # their filename
387 function_line_numbers = FunctionLineNumbers(self.local_workspace)
388 # To map the elf filename against an index
389 if elf_name not in self.elf_map:
390 if elf_name in ELF_MAP:
391 self.elf_map[elf_name] = ELF_MAP[elf_name]
392 else:
393 self.elf_map[elf_name] = self.elf_custom
394 self.elf_custom += 1
395 elf_index = self.elf_map[elf_name]
396 # The function groups have 2 elements:
397 # Function's block name, Function's block code
398 function_groups = re.findall(
399 r"(?s)[0-9a-fA-F]+ <([a-zA-Z0-9_]+)>:\n(.+?)(?:\r*\n\n|\n$)",
400 dump, re.DOTALL | re.MULTILINE)
401 # Pointer to files dictionary
402 source_files = self.source_files_coverage
403 for function_group in function_groups:
404 if len(function_group) != 2:
405 continue
406 block_function_name, block_code = function_group
407 block_code += "\n"
408 # Find if the function has C source code filename
409 function_signature_group = re.findall(
410 r"(?s){}\(\):\n(/.+?):[0-9]+.*(?:\r*\n\n|\n$)".format(
411 block_function_name), block_code, re.DOTALL | re.MULTILINE)
412 if not function_signature_group:
413 continue # Function does not have dwarf signature (sources)
saul-romero-armc8030142021-01-15 10:34:06 +0000414 if not block_function_name in function_list:
415 print("Warning:Function '{}' not found in function list!!!".format(block_function_name))
416 continue # Function not found in function list
Basil Eljuse4b14afb2020-09-30 13:07:23 +0100417 function_list[block_function_name]["sources"] = True
418 block_function_source_file = remove_workspace(
419 function_signature_group[0], prefix)
420 fn_line_number = function_line_numbers.get_line_number(
421 block_function_source_file, block_function_name)
422 if block_function_source_file not in source_files:
423 source_files[block_function_source_file] = {"functions": {},
424 "lines": {}}
425 source_files[block_function_source_file]["functions"][
426 block_function_name] = {"covered": False,
427 "line_number": fn_line_number}
428 # Now lets check the block code
429 # The source code groups have 5 elements:
430 # Function for the statements (optional), Source file for the asm
431 # statements,
432 # line number for the asm statements, asm statements, lookahead
433 # (ignored)
434 source_code_groups = re.findall(SOURCE_PATTERN, block_code,
435 re.DOTALL | re.MULTILINE)
436 is_function_block_covered = False
437 # When not present the last function name applies
438 statements_function_name = block_function_name
439 for source_code_group in source_code_groups:
440 if len(source_code_group) != 5:
441 continue
442 fn_name, source_file, ln, asm_code, _ = source_code_group
443 if not fn_name:
444 # The statement belongs to the most recent function
445 fn_name = statements_function_name
446 else:
447 # Usually in the first iteration fn_name is not empty and
448 # is the function's name block
449 statements_function_name = fn_name
450 if statements_function_name in function_list:
451 # Some of the functions within a block are not defined in
452 # the function list dump
453 function_list[statements_function_name]["sources"] = True
454 statements_source_file = remove_workspace(source_file, prefix)
455 if statements_source_file not in source_files:
456 source_files[statements_source_file] = {"functions": {},
457 "lines": {}}
458 if statements_function_name not in \
459 source_files[statements_source_file]["functions"]:
460 fn_line_number = function_line_numbers.get_line_number(
461 statements_source_file,
462 statements_function_name)
463 source_files[statements_source_file]["functions"][
464 statements_function_name] = \
465 {"covered": False, "line_number": fn_line_number}
466 if ln not in source_files[statements_source_file]["lines"]:
467 source_files[statements_source_file]["lines"][ln] = \
468 {"covered": False, "elf_index": {}}
469 source_file_ln = source_files[statements_source_file]["lines"][
470 ln]
471 asm_line_groups = re.findall(
472 r"(?s)([a-fA-F0-9]+):\t(.+?)(?:\n|$)",
473 asm_code, re.DOTALL | re.MULTILINE)
474 for asm_line in asm_line_groups:
475 if len(asm_line) != 2:
476 continue
477 hex_line_number, opcode = asm_line
478 dec_address = int(hex_line_number, 16)
479 times_executed = 0 if dec_address not in self.traces_stats \
480 else self.traces_stats[dec_address][0]
481 if times_executed > 0:
482 is_function_block_covered = True
483 source_file_ln["covered"] = True
484 source_files[statements_source_file]["functions"][
485 statements_function_name]["covered"] = True
486 if elf_index not in source_file_ln["elf_index"]:
487 source_file_ln["elf_index"][elf_index] = {}
488 if dec_address not in \
489 source_file_ln["elf_index"][elf_index]:
490 source_file_ln["elf_index"][elf_index][dec_address] = (
491 opcode, times_executed)
492 source_files[block_function_source_file]["functions"][
493 block_function_name]["covered"] |= is_function_block_covered
494
495 def process_fn_no_sources(self, function_list):
496 """
497 Checks function coverage for functions with no dwarf signature i.e
498 sources.
499
500 :param function_list: Dictionary of functions to be checked
501 """
502 if not FUNCTION_LINES_ENABLED:
503 return # No source code at the workspace
504 address_seq = sorted(self.traces_stats.keys())
505 for function_name in function_list:
506 # Just check if the start address is in the trace logs
507 covered = function_list[function_name]["start"] in address_seq
508 # Find the source file
509 files = os_command(("grep --include *.c --include *.s -nrw '{}' {}"
510 "| cut -d: -f1").format(function_name,
511 self.local_workspace))
512 unique_files = set(files.split())
513 sources = []
514 line_number = 0
515 for source_file in unique_files:
516 d = get_function_line_numbers(source_file)
517 if function_name in d:
518 line_number = d[function_name]
519 sources.append(source_file)
520 if len(sources) > 1:
521 logger.warning("'{}' declared in {} files:{}".format(
522 function_name, len(sources),
523 ", ".join(sources)))
524 elif len(sources) == 1:
525 source_file = remove_workspace(sources[0],
526 self.local_workspace)
527 if source_file not in self.source_files_coverage:
528 self.source_files_coverage[source_file] = {"functions": {},
529 "lines": {}}
530 if function_name not in \
531 self.source_files_coverage[source_file]["functions"] or \
532 covered:
533 self.source_files_coverage[source_file]["functions"][
534 function_name] = {"covered": covered,
535 "line_number": line_number}
536 else:
537 logger.warning("Function '{}' not found in sources.".format(
538 function_name))
539
540
541json_conf_help = """
542Produces an intermediate json layer for code coverage reporting
543using an input json configuration file.
544
545Input json configuration file format:
546{
547 "configuration":
548 {
549 "remove_workspace": <true if 'workspace' must be from removed from the
550 path of the source files>,
551 "include_assembly": <true to include assembly source code in the
552 intermediate layer>
553 },
554 "parameters":
555 {
556 "objdump": "<Path to the objdump binary to handle dwarf signatures>",
557 "readelf: "<Path to the readelf binary to handle dwarf signatures>",
558 "sources": [ <List of source code origins, one or more of the next
559 options>
560 {
561 "type": "git",
562 "URL": "<URL git repo>",
563 "COMMIT": "<Commit id>",
564 "REFSPEC": "<Refspec>",
565 "LOCATION": "<Folder within 'workspace' where this source
566 is located>"
567 },
568 {
569 "type": "http",
570 "URL": <URL link to file>",
571 "COMPRESSION": "xz",
572 "LOCATION": "<Folder within 'workspace' where this source
573 is located>"
574 }
575 ],
576 "workspace": "<Workspace folder where the source code was located to
577 produce the elf/axf files>",
578 "output_file": "<Intermediate layer output file name and location>",
579 "metadata": {<Metadata objects to be passed to the intermediate json
580 files>}
581 },
582 "elfs": [ <List of elf files to be traced/parsed>
583 {
584 "name": "<Full path name to elf/axf file>",
585 "traces": [ <List of trace files to be parsed for this
586 elf/axf file>
587 "Full path name to the trace file,"
588 ]
589 }
590 ]
591}
592"""
593OBJDUMP = None
594READELF = None
595FUNCTION_LINES_ENABLED = None
596SOURCE_PATTERN = (r'(?s)([a-zA-Z0-0_]+)?(?:\(\):\n)?(^/.+?):([0-9]+)'
597 r'(?: \(.+?\))?\n(.+?)(?=\n/|\n$|([a-zA-Z0-0_]+\(\):))')
598
599
600def main():
601 global OBJDUMP
602 global READELF
603 global FUNCTION_LINES_ENABLED
604
605 parser = argparse.ArgumentParser(epilog=json_conf_help,
606 formatter_class=RawTextHelpFormatter)
607 parser.add_argument('--config-json', metavar='PATH',
608 dest="config_json", default='config_file.json',
609 help='JSON configuration file', required=True)
610 parser.add_argument('--local-workspace', default="",
611 help=('Local workspace folder where source code files'
612 ' and folders resides'))
613 args = parser.parse_args()
614 try:
615 with open(args.config_json, 'r') as f:
616 config = json.load(f)
617 except Exception as ex:
618 print("Error at opening and processing JSON: {}".format(ex))
619 return
620 # Setting toolchain binary tools variables
621 OBJDUMP = config['parameters']['objdump']
622 READELF = config['parameters']['readelf']
623 # Checking if are installed
624 os_command("{} --version".format(OBJDUMP))
625 os_command("{} --version".format(READELF))
626
627 if args.local_workspace != "":
628 # Checking ctags installed
629 try:
630 os_command("ctags --version")
631 except BaseException:
632 print("Warning!: ctags not installed/working function line numbers\
633 will be set to 0. [{}]".format(
634 "sudo apt install exuberant-ctags"))
635 else:
636 FUNCTION_LINES_ENABLED = True
637
638 pp = PostProcessCC(config, args.local_workspace)
639 pp.process()
640
641
642if __name__ == '__main__':
643 logging.basicConfig(filename='intermediate_layer.log', level=logging.DEBUG,
644 format=('%(asctime)s %(levelname)s %(name)s '
645 '%(message)s'))
646 logger = logging.getLogger(__name__)
647 start_time = time.time()
648 main()
649 elapsed_time = time.time() - start_time
650 print("Elapsed time: {}s".format(elapsed_time))