Blame - coverage-tool/coverage-reporting/intermediate_layer.py - ci/qa-tools

blob: 2d11824aa0a48a993deb2263c455596c26fcb35a [file] [log] [blame]

Basil Eljuse	4b14afb	2020-09-30 13:07:23 +0100	[diff] [blame]	1	# !/usr/bin/env python
				2	###############################################################################
				3	# Copyright (c) 2020, ARM Limited and Contributors. All rights reserved.
				4	#
				5	# SPDX-License-Identifier: BSD-3-Clause
				6	###############################################################################
				7
				8	###############################################################################
				9	# FILE: intermediate_layer.py
				10	#
				11	# DESCRIPTION: Creates an intermediate json file with information provided
				12	# by the configuration json file, dwarf signatures and trace
				13	# files.
				14	#
				15	###############################################################################
				16
				17	import os
				18	import re
				19	import glob
				20	import argparse
				21	import subprocess
				22	import json
				23	from argparse import RawTextHelpFormatter
				24	import logging
				25	import time
				26
				27	__version__ = "6.0"
				28
				29	# Static map that defines the elf file source type in the intermediate json
				30	ELF_MAP = {
				31	"bl1": 0,
				32	"bl2": 1,
				33	"bl31": 2,
				34	"bl32": 3,
				35	"scp_ram": 10,
				36	"scp_rom": 11,
				37	"mcp_rom": 12,
				38	"mcp_ram": 13,
				39	"custom_offset": 100
				40	}
				41
				42
				43	def os_command(command, show_command=False):
				44	"""
				45	Function that execute an os command, on fail exit the program
				46
				47	:param command: OS command as string
				48	:param show_command: Optional argument to print the command in stdout
				49	:return: The string output of the os command
				50	"""
				51	out = ""
				52	try:
				53	if show_command:
				54	print("OS command: {}".format(command))
				55	out = subprocess.check_output(
				56	command, stderr=subprocess.STDOUT, shell=True)
				57	except subprocess.CalledProcessError as ex:
				58	raise Exception(
				59	"Exception running command '{}': {}({})".format(
				60	command, ex.output, ex.returncode))
				61	return out.decode("utf8")
				62
				63
				64	def load_stats_from_traces(trace_globs):
				65	"""
				66	Function to process and consolidate statistics from trace files
				67
				68	:param trace_globs: List of trace file patterns
				69	:return: Dictionary with stats from trace files i.e.
				70	{mem address in decimal}=(times executed, inst size)
				71	"""
				72	stats = {}
				73	stat_size = {}
				74
				75	# Make a list of unique trace files
				76	trace_files = []
				77	for tg in trace_globs:
				78	trace_files.extend(glob.glob(tg))
				79	trace_files = set(trace_files)
				80
				81	if not trace_files:
				82	raise Exception("No trace files found for '{}'".format(trace_globs))
				83	# Load stats from the trace files
				84	for trace_file in trace_files:
				85	try:
				86	with open(trace_file, 'r') as f:
				87	for line in f:
				88	data = line.split()
				89	address = int(data[0], 16)
				90	stat = int(data[1])
				91	size = int(data[2])
				92	stat_size[address] = size
				93	if address in stats:
				94	stats[address] += stat
				95	else:
				96	stats[address] = stat
				97	except Exception as ex:
				98	logger.error("@Loading stats from trace files:{}".format(ex))
				99	# Merge the two dicts
				100	for address in stats:
				101	stats[address] = (stats[address], stat_size[address])
				102	return stats
				103
				104
				105	def get_code_sections_for_binary(elf_name):
				106	"""
				107	Function to return the ranges of memory address for sections of code
				108	in the elf file
				109
				110	:param elf_name: Elf binary file name
				111	:return: List of code sections tuples, i.e. (section type, initial
				112	address, end address)
				113	"""
				114	command = """%s -h %s \| grep -B 1 CODE \| grep -v CODE \
				115	\| awk '{print $2" "$4" "$3}'""" % (OBJDUMP, elf_name)
				116	text_out = os_command(command)
				117	sections = text_out.split('\n')
				118	sections.pop()
				119	secs = []
				120	for sec in sections:
				121	try:
				122	d = sec.split()
				123	secs.append((d[0], int(d[1], 16), int(d[2], 16)))
				124	except Exception as ex:
				125	logger.error(
				126	"@Returning memory address code sections:".format(ex))
				127	return secs
				128
				129
				130	def get_executable_ranges_for_binary(elf_name):
				131	"""
				132	Get function ranges from an elf file
				133
				134	:param elf_name: Elf binary file name
				135	:return: List of tuples for ranges i.e. (range start, range end)
				136	"""
				137	# Parse all $x / $d symbols
				138	symbol_table = []
				139	command = r"""%s -s %s \| awk '/\$[xatd]/ {print $2" "$8}'""" % (
				140	READELF, elf_name)
				141	text_out = os_command(command)
				142	lines = text_out.split('\n')
				143	lines.pop()
				144	for line in lines:
				145	try:
				146	data = line.split()
				147	address = int(data[0], 16)
				148	_type = 'X' if data[1] in ['$x', '$t', '$a'] else 'D'
				149	except Exception as ex:
				150	logger.error("@Getting executable ranges:".format(ex))
				151	symbol_table.append((address, _type))
				152
				153	# Add markers for end of code sections
				154	sections = get_code_sections_for_binary(elf_name)
				155	for sec in sections:
				156	symbol_table.append((sec[1] + sec[2], 'S'))
				157
				158	# Sort by address
				159	symbol_table = sorted(symbol_table, key=lambda tup: tup[0])
				160
				161	# Create ranges (list of START/END tuples)
				162	ranges = []
				163	range_start = symbol_table[0][0]
				164	rtype = symbol_table[0][1]
				165	for sym in symbol_table:
				166	if sym[1] != rtype:
				167	if rtype == 'X':
				168	# Substract one because the first address of the
				169	# next range belongs to the next range.
				170	ranges.append((range_start, sym[0] - 1))
				171	range_start = sym[0]
				172	rtype = sym[1]
				173	return ranges
				174
				175
				176	def list_of_functions_for_binary(elf_name):
				177	"""
				178	Get an array of the functions in the elf file
				179
				180	:param elf_name: Elf binary file name
				181	:return: An array of function address start, function address end,
				182	function dwarf signature (sources) addressed by function name
				183	"""
				184	_functions = {}
				185	command = "%s -t %s \| awk 'NR>4' \| sed /^$/d" % (OBJDUMP, elf_name)
				186	symbols_output = os_command(command)
				187	rex = r'([0-9a-fA-F]+) (.{7}) ([^ ]+)[ \t]([0-9a-fA-F]+) (.*)'
				188	symbols = symbols_output.split('\n')[:-1]
				189	for sym in symbols:
				190	try:
				191	symbol_details = re.findall(rex, sym)
				192	symbol_details = symbol_details[0]
				193	if 'F' not in symbol_details[1]:
				194	continue
				195	function_name = symbol_details[4]
				196	# We don't want the .hidden for hidden functions
				197	if function_name.startswith('.hidden '):
				198	function_name = function_name[len('.hidden '):]
				199	if function_name not in _functions:
				200	_functions[function_name] = {'start': symbol_details[0],
				201	'end': symbol_details[3],
				202	'sources': False}
				203	else:
				204	logger.warning("'{}' duplicated in '{}'".format(
				205	function_name,
				206	elf_name))
				207	except Exception as ex:
				208	logger.error("@Listing functions at file {}: {}".format(
				209	elf_name,
				210	ex))
				211	return _functions
				212
				213
				214	def apply_functions_exclude(elf_config, functions):
				215	"""
				216	Remove excluded functions from the list of functions
				217
				218	:param elf_config: Config for elf binary file
				219	:param functions: Array of functions in the binary elf file
				220	:return: Tuple with included and excluded functions
				221	"""
				222	if 'exclude_functions' not in elf_config:
				223	return functions, []
				224	incl = {}
				225	excl = {}
				226	for fname in functions:
				227	exclude = False
				228	for rex in elf_config['exclude_functions']:
				229	if re.match(rex, fname):
				230	exclude = True
				231	excl[fname] = functions[fname]
				232	break
				233	if not exclude:
				234	incl[fname] = functions[fname]
				235	return incl, excl
				236
				237
				238	def remove_workspace(path, workspace):
				239	"""
				240	Get the relative path to a given workspace
				241
				242	:param path: Path relative to the workspace to be returned
				243	:param workspace: Path.
				244	"""
				245	ret = path if workspace is None else os.path.relpath(path, workspace)
				246	# print("{} => {}".format(path, ret))
				247	return ret
				248
				249
				250	def get_function_line_numbers(source_file):
				251	"""
				252	Using ctags get all the function names with their line numbers
				253	within the source_file
				254
				255	:return: Dictionary with function name as key and line number as value
				256	"""
				257	function_lines = os_command(
				258	"ctags -x --c-kinds=f {}".format(source_file)).split("\n")
				259	fln = {}
				260	try:
				261	for line in function_lines:
				262	cols = line.split()
				263	if len(cols) < 3:
				264	continue
				265	if cols[1] == "function":
				266	fln[cols[0]] = int(cols[2])
				267	elif cols[1] == "label" and cols[0] == "func":
				268	fln[cols[-1]] = int(cols[2])
				269	except BaseException:
				270	logger.warning("Warning: Can't get all function line numbers from %s" %
				271	source_file)
				272	return fln
				273
				274
				275	class FunctionLineNumbers(object):
				276
				277	def __init__(self, workspace):
				278	self.filenames = {}
				279	self.workspace = workspace
				280
				281	def get_line_number(self, filename, function_name):
				282	if not FUNCTION_LINES_ENABLED:
				283	return 0
				284	if filename not in self.filenames:
				285	newp = os.path.join(self.workspace, filename)
				286	self.filenames[filename] = get_function_line_numbers(newp)
				287	return 0 if function_name not in self.filenames[filename] else \
				288	self.filenames[filename][function_name]
				289
				290
				291	class PostProcessCC(object):
				292	"""Class used to process the trace data along with the dwarf
				293	signature files to produce an intermediate layer in json with
				294	code coverage in assembly and c source code.
				295	"""
				296
				297	def __init__(self, _config, local_workspace):
				298	self._data = {}
				299	self.config = _config
				300	self.local_workspace = local_workspace
				301	self.elfs = self.config['elfs']
				302	# Dictionary with stats from trace files {address}=(times executed,
				303	# inst size)
				304	self.traces_stats = {}
				305	# Dictionary of unique assembly line memory address against source
				306	# file location
				307	# {assembly address} = (opcode, source file location, line number in
				308	# the source file, times executed)
				309	self.asm_lines = {}
				310	# Dictionary of {source file location}=>{'lines': {'covered':Boolean,
				311	# 'elf_index'; {elf index}=>{assembly address}=>(opcode,
				312	# times executed),
				313	# 'functions': {function name}=>is covered(boolean)}
				314	self.source_files_coverage = {}
				315	self.functions = []
				316	# Unique set of elf list of files
				317	self.elf_map = {}
				318	# For elf custom mappings
				319	self.elf_custom = None
				320
				321	def process(self):
				322	"""
				323	Public method to process the trace files and dwarf signatures
				324	using the information contained in the json configuration file.
				325	This method writes the intermediate json file output linking
				326	the trace data and c source and assembly code.
				327	"""
				328	self.source_files_coverage = {}
				329	self.asm_lines = {}
				330	# Initialize for unknown elf files
				331	self.elf_custom = ELF_MAP["custom_offset"]
				332	sources_config = {}
				333	print("Generating intermediate json layer '{}'...".format(
				334	self.config['parameters']['output_file']))
				335	for elf in self.elfs:
				336	# Gather information
				337	elf_name = elf['name']
				338	os_command("ls {}".format(elf_name))
				339	# Trace data
				340	self.traces_stats = load_stats_from_traces(elf['traces'])
				341	prefix = self.config['parameters']['workspace'] \
				342	if self.config['configuration']['remove_workspace'] else \
				343	None
				344	functions_list = list_of_functions_for_binary(elf_name)
				345	(functions_list, excluded_functions) = apply_functions_exclude(
				346	elf, functions_list)
				347	# Produce code coverage
				348	self.dump_sources(elf_name, functions_list, prefix)
				349	sources_config = self.config['parameters']['sources']
				350	# Now check code coverage in the functions with no dwarf signature
				351	# (sources)
				352	nf = {f: functions_list[f] for f in
				353	functions_list if not
				354	functions_list[f]["sources"]}
				355	self.process_fn_no_sources(nf)
				356	# Write to the intermediate json file
				357	data = {"source_files": self.source_files_coverage,
				358	"configuration": {
				359	"sources": sources_config,
				360	"metadata": "" if 'metadata' not in
				361	self.config['parameters'] else
				362	self.config['parameters']['metadata'],
				363	"elf_map": self.elf_map
				364	}
				365	}
				366	json_data = json.dumps(data, indent=4, sort_keys=True)
				367	with open(self.config['parameters']['output_file'], "w") as f:
				368	f.write(json_data)
				369
				370	def dump_sources(self, elf_filename, function_list, prefix=None):
				371	"""
				372	Process an elf file i.e. match the source and asm lines against trace
				373	files (coverage).
				374
				375	:param elf_filename: Elf binary file name
				376	:param function_list: List of functions in the elf file i.e.
				377	[(address start, address end, function name)]
				378	:param prefix: Optional path name to be removed at the start of source
				379	file locations
				380	"""
				381	command = "%s -Sl %s" % (OBJDUMP, elf_filename)
				382	dump = os_command(command)
				383	dump += "\n" # For pattern matching the last \n
				384	elf_name = os.path.splitext(os.path.basename(elf_filename))[0]
				385	# Object that handles the function line numbers in
				386	# their filename
				387	function_line_numbers = FunctionLineNumbers(self.local_workspace)
				388	# To map the elf filename against an index
				389	if elf_name not in self.elf_map:
				390	if elf_name in ELF_MAP:
				391	self.elf_map[elf_name] = ELF_MAP[elf_name]
				392	else:
				393	self.elf_map[elf_name] = self.elf_custom
				394	self.elf_custom += 1
				395	elf_index = self.elf_map[elf_name]
				396	# The function groups have 2 elements:
				397	# Function's block name, Function's block code
				398	function_groups = re.findall(
				399	r"(?s)[0-9a-fA-F]+ <([a-zA-Z0-9_]+)>:\n(.+?)(?:\r*\n\n\|\n$)",
				400	dump, re.DOTALL \| re.MULTILINE)
				401	# Pointer to files dictionary
				402	source_files = self.source_files_coverage
				403	for function_group in function_groups:
				404	if len(function_group) != 2:
				405	continue
				406	block_function_name, block_code = function_group
				407	block_code += "\n"
				408	# Find if the function has C source code filename
				409	function_signature_group = re.findall(
				410	r"(?s){}:\n(/.+?):[0-9]+.(?:\r\n\n\|\n$)".format(
				411	block_function_name), block_code, re.DOTALL \| re.MULTILINE)
				412	if not function_signature_group:
				413	continue # Function does not have dwarf signature (sources)
saul-romero-arm	c803014	2021-01-15 10:34:06 +0000	[diff] [blame]	414	if not block_function_name in function_list:
				415	print("Warning:Function '{}' not found in function list!!!".format(block_function_name))
				416	continue # Function not found in function list
Basil Eljuse	4b14afb	2020-09-30 13:07:23 +0100	[diff] [blame]	417	function_list[block_function_name]["sources"] = True
				418	block_function_source_file = remove_workspace(
				419	function_signature_group[0], prefix)
				420	fn_line_number = function_line_numbers.get_line_number(
				421	block_function_source_file, block_function_name)
				422	if block_function_source_file not in source_files:
				423	source_files[block_function_source_file] = {"functions": {},
				424	"lines": {}}
				425	source_files[block_function_source_file]["functions"][
				426	block_function_name] = {"covered": False,
				427	"line_number": fn_line_number}
				428	# Now lets check the block code
				429	# The source code groups have 5 elements:
				430	# Function for the statements (optional), Source file for the asm
				431	# statements,
				432	# line number for the asm statements, asm statements, lookahead
				433	# (ignored)
				434	source_code_groups = re.findall(SOURCE_PATTERN, block_code,
				435	re.DOTALL \| re.MULTILINE)
				436	is_function_block_covered = False
				437	# When not present the last function name applies
				438	statements_function_name = block_function_name
				439	for source_code_group in source_code_groups:
				440	if len(source_code_group) != 5:
				441	continue
				442	fn_name, source_file, ln, asm_code, _ = source_code_group
				443	if not fn_name:
				444	# The statement belongs to the most recent function
				445	fn_name = statements_function_name
				446	else:
				447	# Usually in the first iteration fn_name is not empty and
				448	# is the function's name block
				449	statements_function_name = fn_name
				450	if statements_function_name in function_list:
				451	# Some of the functions within a block are not defined in
				452	# the function list dump
				453	function_list[statements_function_name]["sources"] = True
				454	statements_source_file = remove_workspace(source_file, prefix)
				455	if statements_source_file not in source_files:
				456	source_files[statements_source_file] = {"functions": {},
				457	"lines": {}}
				458	if statements_function_name not in \
				459	source_files[statements_source_file]["functions"]:
				460	fn_line_number = function_line_numbers.get_line_number(
				461	statements_source_file,
				462	statements_function_name)
				463	source_files[statements_source_file]["functions"][
				464	statements_function_name] = \
				465	{"covered": False, "line_number": fn_line_number}
				466	if ln not in source_files[statements_source_file]["lines"]:
				467	source_files[statements_source_file]["lines"][ln] = \
				468	{"covered": False, "elf_index": {}}
				469	source_file_ln = source_files[statements_source_file]["lines"][
				470	ln]
				471	asm_line_groups = re.findall(
				472	r"(?s)([a-fA-F0-9]+):\t(.+?)(?:\n\|$)",
				473	asm_code, re.DOTALL \| re.MULTILINE)
				474	for asm_line in asm_line_groups:
				475	if len(asm_line) != 2:
				476	continue
				477	hex_line_number, opcode = asm_line
				478	dec_address = int(hex_line_number, 16)
				479	times_executed = 0 if dec_address not in self.traces_stats \
				480	else self.traces_stats[dec_address][0]
				481	if times_executed > 0:
				482	is_function_block_covered = True
				483	source_file_ln["covered"] = True
				484	source_files[statements_source_file]["functions"][
				485	statements_function_name]["covered"] = True
				486	if elf_index not in source_file_ln["elf_index"]:
				487	source_file_ln["elf_index"][elf_index] = {}
				488	if dec_address not in \
				489	source_file_ln["elf_index"][elf_index]:
				490	source_file_ln["elf_index"][elf_index][dec_address] = (
				491	opcode, times_executed)
				492	source_files[block_function_source_file]["functions"][
				493	block_function_name]["covered"] \|= is_function_block_covered
				494
				495	def process_fn_no_sources(self, function_list):
				496	"""
				497	Checks function coverage for functions with no dwarf signature i.e
				498	sources.
				499
				500	:param function_list: Dictionary of functions to be checked
				501	"""
				502	if not FUNCTION_LINES_ENABLED:
				503	return # No source code at the workspace
				504	address_seq = sorted(self.traces_stats.keys())
				505	for function_name in function_list:
				506	# Just check if the start address is in the trace logs
				507	covered = function_list[function_name]["start"] in address_seq
				508	# Find the source file
				509	files = os_command(("grep --include .c --include .s -nrw '{}' {}"
				510	"\| cut -d: -f1").format(function_name,
				511	self.local_workspace))
				512	unique_files = set(files.split())
				513	sources = []
				514	line_number = 0
				515	for source_file in unique_files:
				516	d = get_function_line_numbers(source_file)
				517	if function_name in d:
				518	line_number = d[function_name]
				519	sources.append(source_file)
				520	if len(sources) > 1:
				521	logger.warning("'{}' declared in {} files:{}".format(
				522	function_name, len(sources),
				523	", ".join(sources)))
				524	elif len(sources) == 1:
				525	source_file = remove_workspace(sources[0],
				526	self.local_workspace)
				527	if source_file not in self.source_files_coverage:
				528	self.source_files_coverage[source_file] = {"functions": {},
				529	"lines": {}}
				530	if function_name not in \
				531	self.source_files_coverage[source_file]["functions"] or \
				532	covered:
				533	self.source_files_coverage[source_file]["functions"][
				534	function_name] = {"covered": covered,
				535	"line_number": line_number}
				536	else:
				537	logger.warning("Function '{}' not found in sources.".format(
				538	function_name))
				539
				540
				541	json_conf_help = """
				542	Produces an intermediate json layer for code coverage reporting
				543	using an input json configuration file.
				544
				545	Input json configuration file format:
				546	{
				547	"configuration":
				548	{
				549	"remove_workspace": <true if 'workspace' must be from removed from the
				550	path of the source files>,
				551	"include_assembly": <true to include assembly source code in the
				552	intermediate layer>
				553	},
				554	"parameters":
				555	{
				556	"objdump": "<Path to the objdump binary to handle dwarf signatures>",
				557	"readelf: "<Path to the readelf binary to handle dwarf signatures>",
				558	"sources": [ <List of source code origins, one or more of the next
				559	options>
				560	{
				561	"type": "git",
				562	"URL": "<URL git repo>",
				563	"COMMIT": "<Commit id>",
				564	"REFSPEC": "<Refspec>",
				565	"LOCATION": "<Folder within 'workspace' where this source
				566	is located>"
				567	},
				568	{
				569	"type": "http",
				570	"URL": <URL link to file>",
				571	"COMPRESSION": "xz",
				572	"LOCATION": "<Folder within 'workspace' where this source
				573	is located>"
				574	}
				575	],
				576	"workspace": "<Workspace folder where the source code was located to
				577	produce the elf/axf files>",
				578	"output_file": "<Intermediate layer output file name and location>",
				579	"metadata": {<Metadata objects to be passed to the intermediate json
				580	files>}
				581	},
				582	"elfs": [ <List of elf files to be traced/parsed>
				583	{
				584	"name": "<Full path name to elf/axf file>",
				585	"traces": [ <List of trace files to be parsed for this
				586	elf/axf file>
				587	"Full path name to the trace file,"
				588	]
				589	}
				590	]
				591	}
				592	"""
				593	OBJDUMP = None
				594	READELF = None
				595	FUNCTION_LINES_ENABLED = None
				596	SOURCE_PATTERN = (r'(?s)([a-zA-Z0-0_]+)?(?::\n)?(^/.+?):([0-9]+)'
				597	r'(?: $.+?$)?\n(.+?)(?=\n/\|\n$\|([a-zA-Z0-0_]+:))')
				598
				599
				600	def main():
				601	global OBJDUMP
				602	global READELF
				603	global FUNCTION_LINES_ENABLED
				604
				605	parser = argparse.ArgumentParser(epilog=json_conf_help,
				606	formatter_class=RawTextHelpFormatter)
				607	parser.add_argument('--config-json', metavar='PATH',
				608	dest="config_json", default='config_file.json',
				609	help='JSON configuration file', required=True)
				610	parser.add_argument('--local-workspace', default="",
				611	help=('Local workspace folder where source code files'
				612	' and folders resides'))
				613	args = parser.parse_args()
				614	try:
				615	with open(args.config_json, 'r') as f:
				616	config = json.load(f)
				617	except Exception as ex:
				618	print("Error at opening and processing JSON: {}".format(ex))
				619	return
				620	# Setting toolchain binary tools variables
				621	OBJDUMP = config['parameters']['objdump']
				622	READELF = config['parameters']['readelf']
				623	# Checking if are installed
				624	os_command("{} --version".format(OBJDUMP))
				625	os_command("{} --version".format(READELF))
				626
				627	if args.local_workspace != "":
				628	# Checking ctags installed
				629	try:
				630	os_command("ctags --version")
				631	except BaseException:
				632	print("Warning!: ctags not installed/working function line numbers\
				633	will be set to 0. [{}]".format(
				634	"sudo apt install exuberant-ctags"))
				635	else:
				636	FUNCTION_LINES_ENABLED = True
				637
				638	pp = PostProcessCC(config, args.local_workspace)
				639	pp.process()
				640
				641
				642	if __name__ == '__main__':
				643	logging.basicConfig(filename='intermediate_layer.log', level=logging.DEBUG,
				644	format=('%(asctime)s %(levelname)s %(name)s '
				645	'%(message)s'))
				646	logger = logging.getLogger(__name__)
				647	start_time = time.time()
				648	main()
				649	elapsed_time = time.time() - start_time
				650	print("Elapsed time: {}s".format(elapsed_time))