blob: c9035d2401f83368f909b10b58871ac86ab0b736 [file] [log] [blame]
Milosz Wasilewski10438582020-12-03 11:36:21 +00001#!/bin/bash
2
Leonardo Sandoval9f159802021-01-20 16:47:25 -06003set -xe
Milosz Wasilewski10438582020-12-03 11:36:21 +00004
Paul Sokolovsky896532e2023-05-12 23:22:49 +03005USE_SQUAD=0
Paul Sokolovsky7bdac642023-12-30 00:40:00 +03006USE_TUXSUITE_FVP=${USE_TUXSUITE_FVP:-0}
7
8# Get LAVA device type from a job file
9get_lava_device_type() {
10 local job_file=$1
11 awk '/^device_type:/ {print $2}' ${job_file}
12}
13
14setup_tuxsuite() {
15 mkdir -p ~/.config/tuxsuite/
16 cat > ~/.config/tuxsuite/config.ini <<EOF
17[default]
18token=$TUXSUITE_TOKEN
Arthur She78acaae2025-02-05 06:15:04 +010019group=$TUXSUITE_GROUP
20project=$TUXSUITE_PROJECT
Paul Sokolovsky7bdac642023-12-30 00:40:00 +030021EOF
22}
Paul Sokolovsky35c231a2023-05-12 22:53:23 +030023
Leonardo Sandoval5a335662021-03-26 19:57:40 -060024# Wait for the LAVA job to finished
25# By default, timeout at 5400 secs (1.5 hours) and monitor every 60 seconds
26wait_lava_job() {
Paul Sokolovsky99eaff62023-01-25 19:41:23 +070027 set +x
Leonardo Sandoval5a335662021-03-26 19:57:40 -060028 local id=$1
29 local timeout="${2:-5400}"
30 local interval="${3:-60}"
31
32 (( t = timeout ))
33
34 while ((t > 0)); do
35 sleep $interval
Paul Sokolovsky99eaff62023-01-25 19:41:23 +070036 resilient_cmd lavacli jobs show $id | tee "${WORKSPACE}/lava-progress.show" | grep 'state *:'
37 set +x
Leonardo Sandoval5a335662021-03-26 19:57:40 -060038 if grep 'state.*: Finished' "${WORKSPACE}/lava-progress.show"; then
Paul Sokolovsky99eaff62023-01-25 19:41:23 +070039 set -x
40 cat "${WORKSPACE}/lava-progress.show"
Paul Sokolovskya153b602022-10-12 20:50:05 +030041 # finished
42 return 0
Leonardo Sandoval5a335662021-03-26 19:57:40 -060043 fi
44 ((t -= interval))
45 done
Paul Sokolovsky99eaff62023-01-25 19:41:23 +070046 set -x
47 cat "${WORKSPACE}/lava-progress.show"
48 echo "Timeout waiting for job to finish"
Paul Sokolovskya153b602022-10-12 20:50:05 +030049 # timeout
50 return 1
Leonardo Sandoval5a335662021-03-26 19:57:40 -060051}
52
Leonardo Sandovaleb94e912021-01-29 12:23:59 -060053# Run the given command passed through parameters, if fails, try
54# at most more N-times with a pause of M-seconds until success.
55resilient_cmd() {
Paul Sokolovsky99eaff62023-01-25 19:41:23 +070056 set +x
Paul Sokolovsky7fb59b52023-05-13 10:00:57 +030057 local max_retries=10
Leonardo Sandovaleb94e912021-01-29 12:23:59 -060058 local sleep_body=2
59 local iter=0
60
Leonardo Sandovaleb94e912021-01-29 12:23:59 -060061 while true; do
Paul Sokolovsky04004c12023-02-24 18:27:40 +070062 if "$@"; then
Leonardo Sandovaleb94e912021-01-29 12:23:59 -060063 break
64 fi
65
66 sleep ${sleep_body}
Paul Sokolovsky719ac1d2023-05-13 10:03:37 +030067 # Exponential backoff
68 sleep_body=$(( sleep_body * 2 ))
69 if [ ${sleep_body} -ge 60 ]; then
70 sleep_body=60
71 echo "WARNING: Command '$@' still not successful on retry #${iter}, exp backoff already limited" 1>&2
72 fi
Leonardo Sandovaleb94e912021-01-29 12:23:59 -060073
74 iter=$(( iter + 1 ))
Paul Sokolovsky7fb59b52023-05-13 10:00:57 +030075 if [ ${iter} -ge ${max_retries} ]; then
Paul Sokolovsky1f0b7482023-05-09 22:47:49 +030076 echo "ERROR: Command '$@' failed ${iter} times in row" 1>&2
Paul Sokolovsky99eaff62023-01-25 19:41:23 +070077 set -x
Leonardo Sandovaleb94e912021-01-29 12:23:59 -060078 return 1
79 fi
80 done
Paul Sokolovsky99eaff62023-01-25 19:41:23 +070081 set -x
Leonardo Sandovaleb94e912021-01-29 12:23:59 -060082 return 0
83}
84
Milosz Wasilewski10438582020-12-03 11:36:21 +000085ls -l ${WORKSPACE}
86
Paul Sokolovsky7bdac642023-12-30 00:40:00 +030087DEVICE=$(get_lava_device_type artefacts-lava/job.yaml)
88
89if [ "${DEVICE}" == "fvp" -a "${USE_TUXSUITE_FVP}" -ne 0 ]; then
90 setup_tuxsuite
91 set -o pipefail
Paul Sokolovsky5ca3d092024-01-24 00:31:34 +070092 for i in $(seq 1 ${LAVA_RETRIES:-3}); do
93 echo "# TuxSuite submission iteration #$i"
94 if python3 -u -m tuxsuite test submit --device fvp-lava --job-definition artefacts-lava/job.yaml | tee tuxsuite-submit.out; then
95 status=0
96 break
97 else
98 status=$?
99 echo "TuxSuite test failed, status: ${status}"
100 fi
101 done
Paul Sokolovsky7bdac642023-12-30 00:40:00 +0300102 TUXID=$(awk '/^uid:/ {print $2}' tuxsuite-submit.out)
103 echo "TuxSuite test ID: ${TUXID}"
104 echo ${TUXID} > ${WORKSPACE}/tux.id
105 tuxsuite test logs --raw ${TUXID} > ${WORKSPACE}/lava-raw.log
106
Paul Sokolovsky4b16d2d2024-03-04 17:12:27 +0700107 if tuxsuite test results ${TUXID} | grep -v "lava.http-download" | grep -q 'fail'; then
Paul Sokolovsky7bdac642023-12-30 00:40:00 +0300108 echo "tuxsuite test submit status was: ${status}, failing testcases found, setting as 1 (failed)"
109 status=1
110 fi
111
Paul Sokolovskyff615d92024-01-26 16:29:55 +0700112 echo "TuxSuite test result: ${status}"
113
Paul Sokolovsky7bdac642023-12-30 00:40:00 +0300114 exit ${status}
115fi
116
Paul Sokolovskyeab838b2023-12-30 00:15:48 +0300117function submit_via_lava_or_squad() {
118
Paul Sokolovsky35c231a2023-05-12 22:53:23 +0300119lavacli identities add --username ${LAVA_USER} --token ${LAVA_TOKEN} --uri "https://${LAVA_SERVER}/RPC2" default
120
121if [ $USE_SQUAD -ne 0 -a -n "${QA_SERVER_VERSION}" ]; then
122 # Submit via SQUAD
123
Leonardo Sandoval9f159802021-01-20 16:47:25 -0600124 if [ -n "${GERRIT_CHANGE_NUMBER}" ] && [ -n "${GERRIT_PATCHSET_NUMBER}" ]; then
Milosz Wasilewski10438582020-12-03 11:36:21 +0000125 curl \
Paul Sokolovskya3ac1262022-07-08 16:03:48 +0300126 --fail \
Milosz Wasilewski10438582020-12-03 11:36:21 +0000127 --retry 4 \
128 -X POST \
129 --header "Auth-Token: ${QA_REPORTS_TOKEN}" \
Milosz Wasilewski10438582020-12-03 11:36:21 +0000130 ${QA_SERVER}/api/createbuild/${QA_SERVER_TEAM}/${QA_SERVER_PROJECT}/${QA_SERVER_VERSION}
131 fi
132
133 TESTJOB_ID=$(curl \
Paul Sokolovskya3ac1262022-07-08 16:03:48 +0300134 --fail \
Milosz Wasilewski10438582020-12-03 11:36:21 +0000135 --retry 4 \
136 -X POST \
137 --header "Auth-Token: ${QA_REPORTS_TOKEN}" \
138 --form backend=${LAVA_SERVER} \
Chris Kayf6ff4672022-11-03 13:09:44 +0000139 --form definition=@artefacts-lava/job.yaml \
Milosz Wasilewski10438582020-12-03 11:36:21 +0000140 ${QA_SERVER}/api/submitjob/${QA_SERVER_TEAM}/${QA_SERVER_PROJECT}/${QA_SERVER_VERSION}/${DEVICE_TYPE})
Leonardo Sandoval9f159802021-01-20 16:47:25 -0600141
Arthur She2770cee2022-09-14 15:12:18 -0700142 # SQUAD will send 400, curl error code 22, on bad test definition
143 if [ "$?" = "22" ]; then
144 echo "Bad test definition!!"
145 exit 1
146 fi
147
Leonardo Sandoval9f159802021-01-20 16:47:25 -0600148 if [ -n "${TESTJOB_ID}" ]; then
Milosz Wasilewski10438582020-12-03 11:36:21 +0000149 echo "TEST JOB URL: ${QA_SERVER}/testjob/${TESTJOB_ID} TEST JOB ID: ${TESTJOB_ID}"
Leonardo Sandoval9f159802021-01-20 16:47:25 -0600150
Leonardo Sandoval9f159802021-01-20 16:47:25 -0600151
Leonardo Sandoval73d301a2021-02-12 13:42:55 -0600152 # The below loop with a sleep is intentional: LAVA could be under heavy load so previous job creation can
153 # take 'some' time to get the right numeric LAVA JOB ID
154 renumber='^[0-9]+$'
155 LAVAJOB_ID="null"
156 iter=0
Chris Kayf1b3da92022-09-08 13:15:16 +0100157 max_tries=120 # run retries for an hour
Leonardo Sandoval73d301a2021-02-12 13:42:55 -0600158 while ! [[ $LAVAJOB_ID =~ $renumber ]]; do
159 if [ $iter -eq $max_tries ] ; then
160 LAVAJOB_ID=''
161 break
162 fi
Leonardo Sandoval8267f432021-05-07 10:02:00 -0500163 sleep 30
Paul Sokolovskya3ac1262022-07-08 16:03:48 +0300164 LAVAJOB_ID=$(curl --fail --retry 4 ${QA_SERVER}/api/testjobs/${TESTJOB_ID}/?fields=job_id)
Leonardo Sandoval73d301a2021-02-12 13:42:55 -0600165
166 # Get the job_id value (whatever it is)
167 LAVAJOB_ID=$(echo ${LAVAJOB_ID} | jq '.job_id')
168 LAVAJOB_ID="${LAVAJOB_ID//\"/}"
169
170 iter=$(( iter + 1 ))
171 done
Paul Sokolovsky35c231a2023-05-12 22:53:23 +0300172 fi
173else
174 # Submit directly to LAVA
175 LAVAJOB_ID=$(resilient_cmd lavacli jobs submit artefacts-lava/job.yaml)
176fi
Leonardo Sandoval9f159802021-01-20 16:47:25 -0600177
Paul Sokolovsky35c231a2023-05-12 22:53:23 +0300178
Paul Sokolovsky74cd2e02023-05-12 23:40:55 +0300179# check that rest query at least get non-empty value
180if [ -n "${LAVAJOB_ID}" ]; then
Leonardo Sandoval9f159802021-01-20 16:47:25 -0600181
Paul Sokolovsky74cd2e02023-05-12 23:40:55 +0300182 echo "LAVA URL: https://${LAVA_SERVER}/scheduler/job/${LAVAJOB_ID} LAVA JOB ID: ${LAVAJOB_ID}"
Leonardo Sandoval9f159802021-01-20 16:47:25 -0600183
Leonardo Sandovala8078d62021-02-11 16:29:25 -0600184
Paul Sokolovsky74cd2e02023-05-12 23:40:55 +0300185 # if timeout on waiting for LAVA to complete, create an 'artificial' lava.log indicating
186 # job ID and timeout seconds
187 if ! wait_lava_job ${LAVAJOB_ID}; then
188 echo "Stopped monitoring LAVA JOB ${LAVAJOB_ID}, likely stuck or timeout too short?" | tee "${WORKSPACE}/lava.log"
189 exit 1
190 else
191 # Retrieve the test job plain log which is a yaml format file from LAVA
192 resilient_cmd sh -c "lavacli jobs logs --raw ${LAVAJOB_ID} > ${WORKSPACE}/lava-raw.log"
Arthur She5fc74272021-03-26 21:24:34 -0700193
Paul Sokolovsky74cd2e02023-05-12 23:40:55 +0300194 # Fetch and store LAVA job result (1 failure, 0 success)
195 resilient_cmd lavacli results ${LAVAJOB_ID} | tee "${WORKSPACE}/lava.results"
196 if grep -q '\[fail\]' "${WORKSPACE}/lava.results"; then
Paul Sokolovskyeab838b2023-12-30 00:15:48 +0300197 return 1
Leonardo Sandoval9f159802021-01-20 16:47:25 -0600198 else
Paul Sokolovskyeab838b2023-12-30 00:15:48 +0300199 return 0
Leonardo Sandoval9f159802021-01-20 16:47:25 -0600200 fi
Milosz Wasilewski10438582020-12-03 11:36:21 +0000201 fi
Paul Sokolovsky74cd2e02023-05-12 23:40:55 +0300202else
203 echo "LAVA Job ID could not be obtained"
204 exit 1
Milosz Wasilewski10438582020-12-03 11:36:21 +0000205fi
Paul Sokolovsky74cd2e02023-05-12 23:40:55 +0300206
Paul Sokolovskyeab838b2023-12-30 00:15:48 +0300207}
208
209# FIXME: Juno and FVP jobs may fail due to non-related users changes,
210# so CI needs to resubmit the job, at most three times:
211# Juno jobs may fail due to LAVA lab infrastructure issues (see
212# https://projects.linaro.org/browse/LSS-2128)
213# FVP jobs may hang at some particular TFTF test (see
214# https://linaro.atlassian.net/browse/TFC-176)
215
216# UPDATE: We want to keep retrying for LAVA for historical reasons,
217# but we want to start from clean page with TuxSuite, so don't
218# retry for it for now, and see how it goes.
219
220status=1
221for i in $(seq 1 ${LAVA_RETRIES:-3}); do
222 echo "# LAVA submission iteration #$i"
223 if submit_via_lava_or_squad; then
224 status=0
225 break
226 fi
227done
228
229exit ${status}