blob: 8b0cdea30880add7b954aa33bfaba6af86eafcf6 [file] [log] [blame]
Matthew Hartfb6fd362020-03-04 21:03:59 +00001#!/usr/bin/env python3
2
3from __future__ import print_function
4
5__copyright__ = """
6/*
Xinyu Zhangaf63f902023-01-05 15:09:28 +08007 * Copyright (c) 2020-2023, Arm Limited. All rights reserved.
Matthew Hartfb6fd362020-03-04 21:03:59 +00008 *
9 * SPDX-License-Identifier: BSD-3-Clause
10 *
11 */
12 """
13
14"""
Paul Sokolovsky65ed4392024-03-25 11:50:58 +070015Script for waiting for device test backend (LAVA/Tux) jobs and parsing the results
Matthew Hartfb6fd362020-03-04 21:03:59 +000016"""
17
18import os
Matthew Hartfb6fd362020-03-04 21:03:59 +000019import time
20import yaml
21import argparse
Xinyu Zhangc8a670c2021-05-18 20:20:53 +080022import shutil
Paul Sokolovskya95abd92022-12-27 13:48:11 +030023import logging
Jianliang Shen418051d2023-08-21 12:01:11 +080024import json
Paul Sokolovsky7fd1bc52023-01-11 20:14:37 +030025from xmlrpc.client import ProtocolError
Matthew Hartfb6fd362020-03-04 21:03:59 +000026from jinja2 import Environment, FileSystemLoader
Paul Sokolovsky02c469e2024-03-15 22:08:37 +070027from lava_helper import test_lava_dispatch_credentials, LAVA_RPC_connector
Xinyu Zhangc918b6e2022-10-08 17:13:17 +080028from lava_submit_jobs import submit_lava_jobs
Paul Sokolovsky2512ec52022-03-04 00:15:39 +030029import codecov_helper
30
Matthew Hartfb6fd362020-03-04 21:03:59 +000031
Paul Sokolovskya95abd92022-12-27 13:48:11 +030032_log = logging.getLogger("lavaci")
33
34
Matthew Hartfb6fd362020-03-04 21:03:59 +000035def wait_for_jobs(user_args):
36 job_list = user_args.job_ids.split(",")
Matthew Hartfb6fd362020-03-04 21:03:59 +000037 lava = test_lava_dispatch_credentials(user_args)
Xinyu Zhangf2b7cbf2021-05-18 20:17:34 +080038 finished_jobs = get_finished_jobs(job_list, user_args, lava)
Xinyu Zhangc8a670c2021-05-18 20:20:53 +080039 resubmit_jobs = resubmit_failed_jobs(finished_jobs, user_args)
Paul Sokolovskyc87beee2022-04-30 08:50:47 +030040 if resubmit_jobs:
Paul Sokolovskyf3674562022-12-27 22:20:01 +030041 _log.info("Waiting for resubmitted jobs: %s", resubmit_jobs)
Paul Sokolovskyc87beee2022-04-30 08:50:47 +030042 finished_resubmit_jobs = get_finished_jobs(resubmit_jobs, user_args, lava)
43 finished_jobs.update(finished_resubmit_jobs)
Paul Sokolovsky451f67b2022-03-08 19:44:41 +030044 return finished_jobs
45
Paul Sokolovsky451f67b2022-03-08 19:44:41 +030046def process_finished_jobs(finished_jobs, user_args):
Xinyu Zhangf2b7cbf2021-05-18 20:17:34 +080047 print_lava_urls(finished_jobs, user_args)
Paul Sokolovsky451f67b2022-03-08 19:44:41 +030048 test_report(finished_jobs, user_args)
Xinyu Zhang82dab282022-10-09 16:33:19 +080049 job_links(finished_jobs, user_args)
Paul Sokolovsky2512ec52022-03-04 00:15:39 +030050 codecov_helper.coverage_reports(finished_jobs, user_args)
Xinyu Zhangf2b7cbf2021-05-18 20:17:34 +080051
52def get_finished_jobs(job_list, user_args, lava):
Paul Sokolovsky65ed4392024-03-25 11:50:58 +070053 _log.info("Waiting for %d LAVA/Tux jobs", len(job_list))
Paul Sokolovskyf9bad0d2024-03-25 15:17:38 +070054
55 fetched_artifacts = set()
56
57 def inline_fetch_artifacts(job_id, info):
58 try:
59 if user_args.artifacts_path:
60 info['job_dir'] = os.path.join(user_args.artifacts_path, "{}_{}".format(job_id, info['description']))
61 fetch_artifacts_for_job(job_id, info, user_args, lava)
62 fetched_artifacts.add(job_id)
63 except Exception as e:
64 _log.exception("")
65 _log.warning("Failed to fetch artifacts for job %s inline, will retry later", job_id)
66
67 finished_jobs = lava.block_wait_for_jobs(job_list, user_args.dispatch_timeout, 5, callback=inline_fetch_artifacts)
Matthew Hartfb6fd362020-03-04 21:03:59 +000068 unfinished_jobs = [item for item in job_list if item not in finished_jobs]
69 for job in unfinished_jobs:
Antonio de Angelis9f7469b2024-04-20 00:32:31 +010070 _log.info("Cancelling unfinished job {} because of timeout.".format(job))
Matthew Hartfb6fd362020-03-04 21:03:59 +000071 lava.cancel_job(job)
Antonio de Angelisa8a4a172024-12-03 13:12:01 +000072 if len(unfinished_jobs):
Xinyu Zhang7fefe5b2023-02-08 11:35:49 +080073 _log.info("Job fails because some test jobs have been cancelled.")
Matthew Hartfb6fd362020-03-04 21:03:59 +000074 if user_args.artifacts_path:
75 for job, info in finished_jobs.items():
76 info['job_dir'] = os.path.join(user_args.artifacts_path, "{}_{}".format(str(job), info['description']))
Paul Sokolovskyf9bad0d2024-03-25 15:17:38 +070077 to_fetch = {job_id: info for job_id, info in finished_jobs.items() if job_id not in fetched_artifacts}
Antonio de Angelisa8a4a172024-12-03 13:12:01 +000078 if len(to_fetch):
79 _log.info("Fetching artifacts for remaining jobs: {}".format(to_fetch.keys()))
80 try:
81 fetch_artifacts(to_fetch, user_args, lava)
82 except Exception as e:
83 _log.exception("Still failed to fetch artifacts for some jobs; continuing, but overall result is failure")
Xinyu Zhangf2b7cbf2021-05-18 20:17:34 +080084 return finished_jobs
Matthew Hartfb6fd362020-03-04 21:03:59 +000085
Xinyu Zhangc8a670c2021-05-18 20:20:53 +080086def resubmit_failed_jobs(jobs, user_args):
87 if not jobs:
88 return []
Xinyu Zhang4aca6d02021-05-31 11:43:32 +080089 time.sleep(2) # be friendly to LAVA
Xinyu Zhangc8a670c2021-05-18 20:20:53 +080090 failed_job = []
91 os.makedirs('failed_jobs', exist_ok=True)
92 for job_id, info in jobs.items():
93 if not (info['health'] == "Complete" and info['state'] == "Finished"):
Paul Sokolovskyb7a41a92022-12-28 18:06:45 +030094 _log.warning(
Paul Sokolovsky6e83a232024-03-11 15:30:04 +070095 "Will resubmit job %s because of its state: %s, health: %s",
Paul Sokolovsky7fa6c9e2022-12-30 15:01:49 +030096 job_id, info["state"], info["health"]
Paul Sokolovskyb7a41a92022-12-28 18:06:45 +030097 )
Xinyu Zhangc8a670c2021-05-18 20:20:53 +080098 job_dir = info['job_dir']
99 def_path = os.path.join(job_dir, 'definition.yaml')
100 os.rename(def_path, 'failed_jobs/{}_definition.yaml'.format(job_id))
101 shutil.rmtree(job_dir)
102 failed_job.append(job_id)
103 for failed_job_id in failed_job:
104 jobs.pop(failed_job_id)
Xinyu Zhangc918b6e2022-10-08 17:13:17 +0800105 resubmitted_jobs = submit_lava_jobs(user_args, job_dir='failed_jobs')
Xinyu Zhangc8a670c2021-05-18 20:20:53 +0800106 return resubmitted_jobs
107
Paul Sokolovskye3955162024-03-25 13:07:58 +0700108
109def fetch_artifacts_for_job(job_id, info, user_args, lava):
110 if not user_args.artifacts_path:
111 return
112 job_dir = info['job_dir']
113 t = time.time()
114
115 retry_delay = 3
116 for retry in range(3, 0, -1):
117 try:
118 os.makedirs(job_dir, exist_ok=True)
119 def_path = os.path.join(job_dir, 'definition.yaml')
120 target_log = os.path.join(job_dir, 'target_log.txt')
121 config = os.path.join(job_dir, 'config.tar.bz2')
122 results_file = os.path.join(job_dir, 'results.yaml')
123 definition = lava.get_job_definition(job_id, info, def_path)
124 info['metadata'] = definition.get('metadata', {})
Paul Sokolovsky71c81b62024-03-30 00:35:52 +0700125 # Fill in device type for Tux jobs
126 if 'device_type' not in info:
127 info['device_type'] = definition.get('device_type')
Paul Sokolovskye3955162024-03-25 13:07:58 +0700128 time.sleep(0.2) # be friendly to LAVA
129 lava.get_job_log(job_id, target_log)
130 time.sleep(0.2)
131 lava.get_job_config(job_id, config)
132 time.sleep(0.2)
133 lava.get_job_results(job_id, info, results_file)
134 break
135 except (ProtocolError, IOError, yaml.error.YAMLError) as e:
136 if retry == 1:
137 raise
138 else:
139 _log.warning("fetch_artifacts(%s): Error %r occurred, retrying", job_id, e)
140 time.sleep(retry_delay)
141 retry_delay *= 2
142
143 _log.info("Fetched artifacts for job %s in %ds", job_id, time.time() - t)
144 codecov_helper.extract_trace_data(target_log, job_dir)
145
146
Matthew Hartfb6fd362020-03-04 21:03:59 +0000147def fetch_artifacts(jobs, user_args, lava):
148 if not user_args.artifacts_path:
149 return
Paul Sokolovskye3955162024-03-25 13:07:58 +0700150
Matthew Hartfb6fd362020-03-04 21:03:59 +0000151 for job_id, info in jobs.items():
Paul Sokolovskye3955162024-03-25 13:07:58 +0700152 fetch_artifacts_for_job(job_id, info, user_args, lava)
Paul Sokolovskyce546192023-01-03 21:28:08 +0300153
Matthew Hartfb6fd362020-03-04 21:03:59 +0000154
155def lava_id_to_url(id, user_args):
Paul Sokolovsky02c469e2024-03-15 22:08:37 +0700156 if LAVA_RPC_connector.is_tux_id(id):
Saheer Babu4ba7caf2025-02-10 12:03:12 +0000157 tuxsuite_group = os.environ.get("TUXSUITE_GROUP")
158 tuxsuite_project = os.environ.get("TUXSUITE_PROJECT")
159 return "https://tuxapi.tuxsuite.com/v1/groups/{}/projects/{}/tests/{}".format(tuxsuite_group, tuxsuite_project, id)
Paul Sokolovsky02c469e2024-03-15 22:08:37 +0700160 else:
161 return "{}/scheduler/job/{}".format(user_args.lava_url, id)
Matthew Hartfb6fd362020-03-04 21:03:59 +0000162
Xinyu Zhang97ee3fd2020-12-14 14:45:06 +0800163def job_links(jobs, user_args):
164 job_links = ""
165 for job, info in jobs.items():
Xinyu Zhang82dab282022-10-09 16:33:19 +0800166 job_links += "\nLAVA Test Config:\n"
167 job_links += "Config Name: {}\n".format(info['metadata']['build_name'])
168 job_links += "Test Result: {}\n".format(info['result'])
169 job_links += "Device Type: {}\n".format(info['metadata']['device_type'])
Xinyu Zhang97ee3fd2020-12-14 14:45:06 +0800170 job_links += "Build link: {}\n".format(info['metadata']['build_job_url'])
Xinyu Zhang78c146a2022-09-05 19:06:40 +0800171 job_links += "LAVA link: {}\n".format(lava_id_to_url(job, user_args))
Xinyu Zhang82dab282022-10-09 16:33:19 +0800172 job_links += "TFM LOG: {}artifact/{}/target_log.txt\n".format(os.getenv("BUILD_URL"), info['job_dir'])
Jianliang Shen48704152023-10-17 17:06:00 +0800173
174 # Save job information to share folder.
175 if os.getenv('JOB_NAME') == 'tf-m-nightly-performance':
176 with open(os.path.join(os.getenv('SHARE_FOLDER'), 'performance_config.txt'), 'a') as f:
177 f.write(info['metadata']['build_name'] + ' ' + info['job_dir'] + '\n')
178
Xinyu Zhang97ee3fd2020-12-14 14:45:06 +0800179 print(job_links)
180
Matthew Hartfb6fd362020-03-04 21:03:59 +0000181def remove_lava_dupes(results):
182 for result in results:
183 if result['result'] != 'pass':
184 if result['suite'] == "lava":
185 for other in [x for x in results if x != result]:
186 if other['name'] == result['name']:
187 if other['result'] == 'pass':
188 results.remove(result)
189 return(results)
190
Paul Sokolovsky451f67b2022-03-08 19:44:41 +0300191def test_report(jobs, user_args):
Matthew Hartfb6fd362020-03-04 21:03:59 +0000192 # parsing of test results is WIP
193 fail_j = []
194 jinja_data = []
195 for job, info in jobs.items():
Xinyu Zhang0f78e7a2022-10-17 13:55:52 +0800196 info['result'] = 'SUCCESS'
Xinyu Zhang82dab282022-10-09 16:33:19 +0800197 if info['health'] != 'Complete':
Xinyu Zhang0f78e7a2022-10-17 13:55:52 +0800198 info['result'] = 'FAILURE'
Xinyu Zhang82dab282022-10-09 16:33:19 +0800199 fail_j.append(job)
200 continue
Matthew Hart4a4f1202020-06-12 15:52:46 +0100201 results_file = os.path.join(info['job_dir'], 'results.yaml')
202 if not os.path.exists(results_file) or (os.path.getsize(results_file) == 0):
Xinyu Zhang0f78e7a2022-10-17 13:55:52 +0800203 info['result'] = 'FAILURE'
Matthew Hart4a4f1202020-06-12 15:52:46 +0100204 fail_j.append(job)
205 continue
206 with open(results_file, "r") as F:
207 res_data = F.read()
Paul Sokolovskyf2f385d2022-01-11 00:36:31 +0300208 results = yaml.safe_load(res_data)
Paul Sokolovsky07f6dfb2022-07-15 12:26:24 +0300209 non_lava_results = [x for x in results if x['suite'] != 'lava' or x['name'] == 'lava-test-monitor']
Matthew Hartfb6fd362020-03-04 21:03:59 +0000210 info['lava_url'] = lava_id_to_url(job, user_args)
Arthur She38d5f5a2022-09-02 17:32:14 -0700211 info['artifacts_dir'] = info['job_dir']
Matthew Hartfb6fd362020-03-04 21:03:59 +0000212 jinja_data.append({job: [info, non_lava_results]})
213 for result in non_lava_results:
Paul Sokolovsky58f00de2022-02-01 00:26:32 +0300214 if result['result'] == 'fail':
Xinyu Zhang0f78e7a2022-10-17 13:55:52 +0800215 info['result'] = 'FAILURE'
Matthew Hartfb6fd362020-03-04 21:03:59 +0000216 fail_j.append(job) if job not in fail_j else fail_j
217 time.sleep(0.5) # be friendly to LAVA
Matthew Hartfb6fd362020-03-04 21:03:59 +0000218 data = {}
219 data['jobs'] = jinja_data
220 render_jinja(data)
221
222def render_jinja(data):
223 work_dir = os.path.join(os.path.abspath(os.path.dirname(__file__)), "jinja2_templates")
224 template_loader = FileSystemLoader(searchpath=work_dir)
225 template_env = Environment(loader=template_loader)
226 html = template_env.get_template("test_summary.jinja2").render(data)
227 csv = template_env.get_template("test_summary_csv.jinja2").render(data)
228 with open('test_summary.html', "w") as F:
229 F.write(html)
230 with open('test_summary.csv', "w") as F:
231 F.write(csv)
232
233def print_lava_urls(jobs, user_args):
234 output = [lava_id_to_url(x, user_args) for x in jobs]
Paul Sokolovsky65ed4392024-03-25 11:50:58 +0700235 info_print("LAVA/Tux jobs triggered for this build: {}".format(output))
Matthew Hartfb6fd362020-03-04 21:03:59 +0000236
237
Xinyu Zhang78c146a2022-09-05 19:06:40 +0800238def info_print(line, silent=True):
239 if not silent:
240 print("INFO: {}".format(line))
Matthew Hartfb6fd362020-03-04 21:03:59 +0000241
Paul Sokolovskyde25e1f2023-01-02 14:29:21 +0300242# WARNING: Setting this to >1 is a last resort, temporary stop-gap measure,
243# which will overload LAVA and jeopardize stability of the entire TF CI.
244INEFFICIENT_RETRIES = 1
245
246
Matthew Hartfb6fd362020-03-04 21:03:59 +0000247def main(user_args):
248 """ Main logic """
Paul Sokolovskyde25e1f2023-01-02 14:29:21 +0300249 for try_time in range(INEFFICIENT_RETRIES):
Xinyu Zhang3e8f6602021-04-28 10:57:32 +0800250 try:
Paul Sokolovsky451f67b2022-03-08 19:44:41 +0300251 finished_jobs = wait_for_jobs(user_args)
Xinyu Zhang3e8f6602021-04-28 10:57:32 +0800252 break
253 except Exception as e:
Paul Sokolovskyde25e1f2023-01-02 14:29:21 +0300254 if try_time < INEFFICIENT_RETRIES - 1:
Paul Sokolovskyf3674562022-12-27 22:20:01 +0300255 _log.exception("Exception in wait_for_jobs")
256 _log.info("Will try to get LAVA jobs again, this was try: %d", try_time)
Xinyu Zhang3e8f6602021-04-28 10:57:32 +0800257 else:
258 raise e
Paul Sokolovsky451f67b2022-03-08 19:44:41 +0300259 process_finished_jobs(finished_jobs, user_args)
Xinyu Zhangaf63f902023-01-05 15:09:28 +0800260 if len(finished_jobs) < len(user_args.job_ids.split(",")):
261 raise Exception("Some LAVA jobs cancelled.")
Matthew Hartfb6fd362020-03-04 21:03:59 +0000262
263def get_cmd_args():
264 """ Parse command line arguments """
265
266 # Parse command line arguments to override config
267 parser = argparse.ArgumentParser(description="Lava Wait Jobs")
268 cmdargs = parser.add_argument_group("Lava Wait Jobs")
269
270 # Configuration control
271 cmdargs.add_argument(
272 "--lava-url", dest="lava_url", action="store", help="LAVA lab URL (without RPC2)"
273 )
274 cmdargs.add_argument(
275 "--job-ids", dest="job_ids", action="store", required=True, help="Comma separated list of job IDS"
276 )
277 cmdargs.add_argument(
Xinyu Zhangf2b7cbf2021-05-18 20:17:34 +0800278 "--lava-token", dest="lava_token", action="store", help="LAVA auth token"
Matthew Hartfb6fd362020-03-04 21:03:59 +0000279 )
280 cmdargs.add_argument(
Xinyu Zhangf2b7cbf2021-05-18 20:17:34 +0800281 "--lava-user", dest="lava_user", action="store", help="LAVA username"
Matthew Hartfb6fd362020-03-04 21:03:59 +0000282 )
283 cmdargs.add_argument(
284 "--use-env", dest="token_from_env", action="store_true", default=False, help="Use LAVA auth info from environment"
285 )
286 cmdargs.add_argument(
287 "--lava-timeout", dest="dispatch_timeout", action="store", type=int, default=3600, help="Time in seconds to wait for all jobs"
288 )
289 cmdargs.add_argument(
290 "--artifacts-path", dest="artifacts_path", action="store", help="Download LAVA artifacts to this directory"
291 )
292 return parser.parse_args()
293
294
295if __name__ == "__main__":
Paul Sokolovskya95abd92022-12-27 13:48:11 +0300296 logging.basicConfig(level=logging.INFO)
Matthew Hartfb6fd362020-03-04 21:03:59 +0000297 main(get_cmd_args())