blob: 26db6adbc6ae8298ea3fbd7e864cdded9045f543 [file] [log] [blame]
Milosz Wasilewski10438582020-12-03 11:36:21 +00001#!/bin/bash
2
Leonardo Sandoval9f159802021-01-20 16:47:25 -06003set -xe
Milosz Wasilewski10438582020-12-03 11:36:21 +00004
Paul Sokolovsky896532e2023-05-12 23:22:49 +03005USE_SQUAD=0
Paul Sokolovsky7bdac642023-12-30 00:40:00 +03006USE_TUXSUITE_FVP=${USE_TUXSUITE_FVP:-0}
7
8# Get LAVA device type from a job file
9get_lava_device_type() {
10 local job_file=$1
11 awk '/^device_type:/ {print $2}' ${job_file}
12}
13
14setup_tuxsuite() {
15 mkdir -p ~/.config/tuxsuite/
16 cat > ~/.config/tuxsuite/config.ini <<EOF
17[default]
18token=$TUXSUITE_TOKEN
19group=tfc
20project=ci
21EOF
22}
Paul Sokolovsky35c231a2023-05-12 22:53:23 +030023
Leonardo Sandoval5a335662021-03-26 19:57:40 -060024# Wait for the LAVA job to finished
25# By default, timeout at 5400 secs (1.5 hours) and monitor every 60 seconds
26wait_lava_job() {
Paul Sokolovsky99eaff62023-01-25 19:41:23 +070027 set +x
Leonardo Sandoval5a335662021-03-26 19:57:40 -060028 local id=$1
29 local timeout="${2:-5400}"
30 local interval="${3:-60}"
31
32 (( t = timeout ))
33
34 while ((t > 0)); do
35 sleep $interval
Paul Sokolovsky99eaff62023-01-25 19:41:23 +070036 resilient_cmd lavacli jobs show $id | tee "${WORKSPACE}/lava-progress.show" | grep 'state *:'
37 set +x
Leonardo Sandoval5a335662021-03-26 19:57:40 -060038 if grep 'state.*: Finished' "${WORKSPACE}/lava-progress.show"; then
Paul Sokolovsky99eaff62023-01-25 19:41:23 +070039 set -x
40 cat "${WORKSPACE}/lava-progress.show"
Paul Sokolovskya153b602022-10-12 20:50:05 +030041 # finished
42 return 0
Leonardo Sandoval5a335662021-03-26 19:57:40 -060043 fi
44 ((t -= interval))
45 done
Paul Sokolovsky99eaff62023-01-25 19:41:23 +070046 set -x
47 cat "${WORKSPACE}/lava-progress.show"
48 echo "Timeout waiting for job to finish"
Paul Sokolovskya153b602022-10-12 20:50:05 +030049 # timeout
50 return 1
Leonardo Sandoval5a335662021-03-26 19:57:40 -060051}
52
Leonardo Sandovaleb94e912021-01-29 12:23:59 -060053# Run the given command passed through parameters, if fails, try
54# at most more N-times with a pause of M-seconds until success.
55resilient_cmd() {
Paul Sokolovsky99eaff62023-01-25 19:41:23 +070056 set +x
Paul Sokolovsky7fb59b52023-05-13 10:00:57 +030057 local max_retries=10
Leonardo Sandovaleb94e912021-01-29 12:23:59 -060058 local sleep_body=2
59 local iter=0
60
Leonardo Sandovaleb94e912021-01-29 12:23:59 -060061 while true; do
Paul Sokolovsky04004c12023-02-24 18:27:40 +070062 if "$@"; then
Leonardo Sandovaleb94e912021-01-29 12:23:59 -060063 break
64 fi
65
66 sleep ${sleep_body}
Paul Sokolovsky719ac1d2023-05-13 10:03:37 +030067 # Exponential backoff
68 sleep_body=$(( sleep_body * 2 ))
69 if [ ${sleep_body} -ge 60 ]; then
70 sleep_body=60
71 echo "WARNING: Command '$@' still not successful on retry #${iter}, exp backoff already limited" 1>&2
72 fi
Leonardo Sandovaleb94e912021-01-29 12:23:59 -060073
74 iter=$(( iter + 1 ))
Paul Sokolovsky7fb59b52023-05-13 10:00:57 +030075 if [ ${iter} -ge ${max_retries} ]; then
Paul Sokolovsky1f0b7482023-05-09 22:47:49 +030076 echo "ERROR: Command '$@' failed ${iter} times in row" 1>&2
Paul Sokolovsky99eaff62023-01-25 19:41:23 +070077 set -x
Leonardo Sandovaleb94e912021-01-29 12:23:59 -060078 return 1
79 fi
80 done
Paul Sokolovsky99eaff62023-01-25 19:41:23 +070081 set -x
Leonardo Sandovaleb94e912021-01-29 12:23:59 -060082 return 0
83}
84
Milosz Wasilewski10438582020-12-03 11:36:21 +000085ls -l ${WORKSPACE}
86
Paul Sokolovsky7bdac642023-12-30 00:40:00 +030087DEVICE=$(get_lava_device_type artefacts-lava/job.yaml)
88
89if [ "${DEVICE}" == "fvp" -a "${USE_TUXSUITE_FVP}" -ne 0 ]; then
90 setup_tuxsuite
91 set -o pipefail
92 if python3 -u -m tuxsuite test submit --device fvp-lava --job-definition artefacts-lava/job.yaml | tee tuxsuite-submit.out; then
93 status=0
94 else
95 status=$?
96 echo "TuxSuite test failed, status: ${status}"
97 fi
98 TUXID=$(awk '/^uid:/ {print $2}' tuxsuite-submit.out)
99 echo "TuxSuite test ID: ${TUXID}"
100 echo ${TUXID} > ${WORKSPACE}/tux.id
101 tuxsuite test logs --raw ${TUXID} > ${WORKSPACE}/lava-raw.log
102
103 if tuxsuite test results --raw ${TUXID} | python3 -m json.tool | grep -q '"result": "fail"'; then
104 echo "tuxsuite test submit status was: ${status}, failing testcases found, setting as 1 (failed)"
105 status=1
106 fi
107
108 exit ${status}
109fi
110
Paul Sokolovskyeab838b2023-12-30 00:15:48 +0300111function submit_via_lava_or_squad() {
112
Paul Sokolovsky35c231a2023-05-12 22:53:23 +0300113lavacli identities add --username ${LAVA_USER} --token ${LAVA_TOKEN} --uri "https://${LAVA_SERVER}/RPC2" default
114
115if [ $USE_SQUAD -ne 0 -a -n "${QA_SERVER_VERSION}" ]; then
116 # Submit via SQUAD
117
Leonardo Sandoval9f159802021-01-20 16:47:25 -0600118 if [ -n "${GERRIT_CHANGE_NUMBER}" ] && [ -n "${GERRIT_PATCHSET_NUMBER}" ]; then
Milosz Wasilewski10438582020-12-03 11:36:21 +0000119 curl \
Paul Sokolovskya3ac1262022-07-08 16:03:48 +0300120 --fail \
Milosz Wasilewski10438582020-12-03 11:36:21 +0000121 --retry 4 \
122 -X POST \
123 --header "Auth-Token: ${QA_REPORTS_TOKEN}" \
Milosz Wasilewski10438582020-12-03 11:36:21 +0000124 ${QA_SERVER}/api/createbuild/${QA_SERVER_TEAM}/${QA_SERVER_PROJECT}/${QA_SERVER_VERSION}
125 fi
126
127 TESTJOB_ID=$(curl \
Paul Sokolovskya3ac1262022-07-08 16:03:48 +0300128 --fail \
Milosz Wasilewski10438582020-12-03 11:36:21 +0000129 --retry 4 \
130 -X POST \
131 --header "Auth-Token: ${QA_REPORTS_TOKEN}" \
132 --form backend=${LAVA_SERVER} \
Chris Kayf6ff4672022-11-03 13:09:44 +0000133 --form definition=@artefacts-lava/job.yaml \
Milosz Wasilewski10438582020-12-03 11:36:21 +0000134 ${QA_SERVER}/api/submitjob/${QA_SERVER_TEAM}/${QA_SERVER_PROJECT}/${QA_SERVER_VERSION}/${DEVICE_TYPE})
Leonardo Sandoval9f159802021-01-20 16:47:25 -0600135
Arthur She2770cee2022-09-14 15:12:18 -0700136 # SQUAD will send 400, curl error code 22, on bad test definition
137 if [ "$?" = "22" ]; then
138 echo "Bad test definition!!"
139 exit 1
140 fi
141
Leonardo Sandoval9f159802021-01-20 16:47:25 -0600142 if [ -n "${TESTJOB_ID}" ]; then
Milosz Wasilewski10438582020-12-03 11:36:21 +0000143 echo "TEST JOB URL: ${QA_SERVER}/testjob/${TESTJOB_ID} TEST JOB ID: ${TESTJOB_ID}"
Leonardo Sandoval9f159802021-01-20 16:47:25 -0600144
Leonardo Sandoval9f159802021-01-20 16:47:25 -0600145
Leonardo Sandoval73d301a2021-02-12 13:42:55 -0600146 # The below loop with a sleep is intentional: LAVA could be under heavy load so previous job creation can
147 # take 'some' time to get the right numeric LAVA JOB ID
148 renumber='^[0-9]+$'
149 LAVAJOB_ID="null"
150 iter=0
Chris Kayf1b3da92022-09-08 13:15:16 +0100151 max_tries=120 # run retries for an hour
Leonardo Sandoval73d301a2021-02-12 13:42:55 -0600152 while ! [[ $LAVAJOB_ID =~ $renumber ]]; do
153 if [ $iter -eq $max_tries ] ; then
154 LAVAJOB_ID=''
155 break
156 fi
Leonardo Sandoval8267f432021-05-07 10:02:00 -0500157 sleep 30
Paul Sokolovskya3ac1262022-07-08 16:03:48 +0300158 LAVAJOB_ID=$(curl --fail --retry 4 ${QA_SERVER}/api/testjobs/${TESTJOB_ID}/?fields=job_id)
Leonardo Sandoval73d301a2021-02-12 13:42:55 -0600159
160 # Get the job_id value (whatever it is)
161 LAVAJOB_ID=$(echo ${LAVAJOB_ID} | jq '.job_id')
162 LAVAJOB_ID="${LAVAJOB_ID//\"/}"
163
164 iter=$(( iter + 1 ))
165 done
Paul Sokolovsky35c231a2023-05-12 22:53:23 +0300166 fi
167else
168 # Submit directly to LAVA
169 LAVAJOB_ID=$(resilient_cmd lavacli jobs submit artefacts-lava/job.yaml)
170fi
Leonardo Sandoval9f159802021-01-20 16:47:25 -0600171
Paul Sokolovsky35c231a2023-05-12 22:53:23 +0300172
Paul Sokolovsky74cd2e02023-05-12 23:40:55 +0300173# check that rest query at least get non-empty value
174if [ -n "${LAVAJOB_ID}" ]; then
Leonardo Sandoval9f159802021-01-20 16:47:25 -0600175
Paul Sokolovsky74cd2e02023-05-12 23:40:55 +0300176 echo "LAVA URL: https://${LAVA_SERVER}/scheduler/job/${LAVAJOB_ID} LAVA JOB ID: ${LAVAJOB_ID}"
Leonardo Sandoval9f159802021-01-20 16:47:25 -0600177
Leonardo Sandovala8078d62021-02-11 16:29:25 -0600178
Paul Sokolovsky74cd2e02023-05-12 23:40:55 +0300179 # if timeout on waiting for LAVA to complete, create an 'artificial' lava.log indicating
180 # job ID and timeout seconds
181 if ! wait_lava_job ${LAVAJOB_ID}; then
182 echo "Stopped monitoring LAVA JOB ${LAVAJOB_ID}, likely stuck or timeout too short?" | tee "${WORKSPACE}/lava.log"
183 exit 1
184 else
185 # Retrieve the test job plain log which is a yaml format file from LAVA
186 resilient_cmd sh -c "lavacli jobs logs --raw ${LAVAJOB_ID} > ${WORKSPACE}/lava-raw.log"
Arthur She5fc74272021-03-26 21:24:34 -0700187
Paul Sokolovsky74cd2e02023-05-12 23:40:55 +0300188 # Fetch and store LAVA job result (1 failure, 0 success)
189 resilient_cmd lavacli results ${LAVAJOB_ID} | tee "${WORKSPACE}/lava.results"
190 if grep -q '\[fail\]' "${WORKSPACE}/lava.results"; then
Paul Sokolovskyeab838b2023-12-30 00:15:48 +0300191 return 1
Leonardo Sandoval9f159802021-01-20 16:47:25 -0600192 else
Paul Sokolovskyeab838b2023-12-30 00:15:48 +0300193 return 0
Leonardo Sandoval9f159802021-01-20 16:47:25 -0600194 fi
Milosz Wasilewski10438582020-12-03 11:36:21 +0000195 fi
Paul Sokolovsky74cd2e02023-05-12 23:40:55 +0300196else
197 echo "LAVA Job ID could not be obtained"
198 exit 1
Milosz Wasilewski10438582020-12-03 11:36:21 +0000199fi
Paul Sokolovsky74cd2e02023-05-12 23:40:55 +0300200
Paul Sokolovskyeab838b2023-12-30 00:15:48 +0300201}
202
203# FIXME: Juno and FVP jobs may fail due to non-related users changes,
204# so CI needs to resubmit the job, at most three times:
205# Juno jobs may fail due to LAVA lab infrastructure issues (see
206# https://projects.linaro.org/browse/LSS-2128)
207# FVP jobs may hang at some particular TFTF test (see
208# https://linaro.atlassian.net/browse/TFC-176)
209
210# UPDATE: We want to keep retrying for LAVA for historical reasons,
211# but we want to start from clean page with TuxSuite, so don't
212# retry for it for now, and see how it goes.
213
214status=1
215for i in $(seq 1 ${LAVA_RETRIES:-3}); do
216 echo "# LAVA submission iteration #$i"
217 if submit_via_lava_or_squad; then
218 status=0
219 break
220 fi
221done
222
223exit ${status}