blob: 800dcbd8326488bba2c84f21ab96e32c1bfdfca2 [file] [log] [blame]
Milosz Wasilewski10438582020-12-03 11:36:21 +00001#!/bin/bash
2
Leonardo Sandoval9f159802021-01-20 16:47:25 -06003set -xe
Milosz Wasilewski10438582020-12-03 11:36:21 +00004
Paul Sokolovsky896532e2023-05-12 23:22:49 +03005USE_SQUAD=0
Paul Sokolovsky35c231a2023-05-12 22:53:23 +03006
Leonardo Sandoval5a335662021-03-26 19:57:40 -06007# Wait for the LAVA job to finished
8# By default, timeout at 5400 secs (1.5 hours) and monitor every 60 seconds
9wait_lava_job() {
Paul Sokolovsky99eaff62023-01-25 19:41:23 +070010 set +x
Leonardo Sandoval5a335662021-03-26 19:57:40 -060011 local id=$1
12 local timeout="${2:-5400}"
13 local interval="${3:-60}"
14
15 (( t = timeout ))
16
17 while ((t > 0)); do
18 sleep $interval
Paul Sokolovsky99eaff62023-01-25 19:41:23 +070019 resilient_cmd lavacli jobs show $id | tee "${WORKSPACE}/lava-progress.show" | grep 'state *:'
20 set +x
Leonardo Sandoval5a335662021-03-26 19:57:40 -060021 if grep 'state.*: Finished' "${WORKSPACE}/lava-progress.show"; then
Paul Sokolovsky99eaff62023-01-25 19:41:23 +070022 set -x
23 cat "${WORKSPACE}/lava-progress.show"
Paul Sokolovskya153b602022-10-12 20:50:05 +030024 # finished
25 return 0
Leonardo Sandoval5a335662021-03-26 19:57:40 -060026 fi
27 ((t -= interval))
28 done
Paul Sokolovsky99eaff62023-01-25 19:41:23 +070029 set -x
30 cat "${WORKSPACE}/lava-progress.show"
31 echo "Timeout waiting for job to finish"
Paul Sokolovskya153b602022-10-12 20:50:05 +030032 # timeout
33 return 1
Leonardo Sandoval5a335662021-03-26 19:57:40 -060034}
35
Leonardo Sandovaleb94e912021-01-29 12:23:59 -060036# Run the given command passed through parameters, if fails, try
37# at most more N-times with a pause of M-seconds until success.
38resilient_cmd() {
Paul Sokolovsky99eaff62023-01-25 19:41:23 +070039 set +x
Paul Sokolovsky7fb59b52023-05-13 10:00:57 +030040 local max_retries=10
Leonardo Sandovaleb94e912021-01-29 12:23:59 -060041 local sleep_body=2
42 local iter=0
43
Leonardo Sandovaleb94e912021-01-29 12:23:59 -060044 while true; do
Paul Sokolovsky04004c12023-02-24 18:27:40 +070045 if "$@"; then
Leonardo Sandovaleb94e912021-01-29 12:23:59 -060046 break
47 fi
48
49 sleep ${sleep_body}
Paul Sokolovsky719ac1d2023-05-13 10:03:37 +030050 # Exponential backoff
51 sleep_body=$(( sleep_body * 2 ))
52 if [ ${sleep_body} -ge 60 ]; then
53 sleep_body=60
54 echo "WARNING: Command '$@' still not successful on retry #${iter}, exp backoff already limited" 1>&2
55 fi
Leonardo Sandovaleb94e912021-01-29 12:23:59 -060056
57 iter=$(( iter + 1 ))
Paul Sokolovsky7fb59b52023-05-13 10:00:57 +030058 if [ ${iter} -ge ${max_retries} ]; then
Paul Sokolovsky1f0b7482023-05-09 22:47:49 +030059 echo "ERROR: Command '$@' failed ${iter} times in row" 1>&2
Paul Sokolovsky99eaff62023-01-25 19:41:23 +070060 set -x
Leonardo Sandovaleb94e912021-01-29 12:23:59 -060061 return 1
62 fi
63 done
Paul Sokolovsky99eaff62023-01-25 19:41:23 +070064 set -x
Leonardo Sandovaleb94e912021-01-29 12:23:59 -060065 return 0
66}
67
Milosz Wasilewski10438582020-12-03 11:36:21 +000068ls -l ${WORKSPACE}
69
Paul Sokolovskyeab838b2023-12-30 00:15:48 +030070function submit_via_lava_or_squad() {
71
Paul Sokolovsky35c231a2023-05-12 22:53:23 +030072lavacli identities add --username ${LAVA_USER} --token ${LAVA_TOKEN} --uri "https://${LAVA_SERVER}/RPC2" default
73
74if [ $USE_SQUAD -ne 0 -a -n "${QA_SERVER_VERSION}" ]; then
75 # Submit via SQUAD
76
Leonardo Sandoval9f159802021-01-20 16:47:25 -060077 if [ -n "${GERRIT_CHANGE_NUMBER}" ] && [ -n "${GERRIT_PATCHSET_NUMBER}" ]; then
Milosz Wasilewski10438582020-12-03 11:36:21 +000078 curl \
Paul Sokolovskya3ac1262022-07-08 16:03:48 +030079 --fail \
Milosz Wasilewski10438582020-12-03 11:36:21 +000080 --retry 4 \
81 -X POST \
82 --header "Auth-Token: ${QA_REPORTS_TOKEN}" \
Milosz Wasilewski10438582020-12-03 11:36:21 +000083 ${QA_SERVER}/api/createbuild/${QA_SERVER_TEAM}/${QA_SERVER_PROJECT}/${QA_SERVER_VERSION}
84 fi
85
86 TESTJOB_ID=$(curl \
Paul Sokolovskya3ac1262022-07-08 16:03:48 +030087 --fail \
Milosz Wasilewski10438582020-12-03 11:36:21 +000088 --retry 4 \
89 -X POST \
90 --header "Auth-Token: ${QA_REPORTS_TOKEN}" \
91 --form backend=${LAVA_SERVER} \
Chris Kayf6ff4672022-11-03 13:09:44 +000092 --form definition=@artefacts-lava/job.yaml \
Milosz Wasilewski10438582020-12-03 11:36:21 +000093 ${QA_SERVER}/api/submitjob/${QA_SERVER_TEAM}/${QA_SERVER_PROJECT}/${QA_SERVER_VERSION}/${DEVICE_TYPE})
Leonardo Sandoval9f159802021-01-20 16:47:25 -060094
Arthur She2770cee2022-09-14 15:12:18 -070095 # SQUAD will send 400, curl error code 22, on bad test definition
96 if [ "$?" = "22" ]; then
97 echo "Bad test definition!!"
98 exit 1
99 fi
100
Leonardo Sandoval9f159802021-01-20 16:47:25 -0600101 if [ -n "${TESTJOB_ID}" ]; then
Milosz Wasilewski10438582020-12-03 11:36:21 +0000102 echo "TEST JOB URL: ${QA_SERVER}/testjob/${TESTJOB_ID} TEST JOB ID: ${TESTJOB_ID}"
Leonardo Sandoval9f159802021-01-20 16:47:25 -0600103
Leonardo Sandoval9f159802021-01-20 16:47:25 -0600104
Leonardo Sandoval73d301a2021-02-12 13:42:55 -0600105 # The below loop with a sleep is intentional: LAVA could be under heavy load so previous job creation can
106 # take 'some' time to get the right numeric LAVA JOB ID
107 renumber='^[0-9]+$'
108 LAVAJOB_ID="null"
109 iter=0
Chris Kayf1b3da92022-09-08 13:15:16 +0100110 max_tries=120 # run retries for an hour
Leonardo Sandoval73d301a2021-02-12 13:42:55 -0600111 while ! [[ $LAVAJOB_ID =~ $renumber ]]; do
112 if [ $iter -eq $max_tries ] ; then
113 LAVAJOB_ID=''
114 break
115 fi
Leonardo Sandoval8267f432021-05-07 10:02:00 -0500116 sleep 30
Paul Sokolovskya3ac1262022-07-08 16:03:48 +0300117 LAVAJOB_ID=$(curl --fail --retry 4 ${QA_SERVER}/api/testjobs/${TESTJOB_ID}/?fields=job_id)
Leonardo Sandoval73d301a2021-02-12 13:42:55 -0600118
119 # Get the job_id value (whatever it is)
120 LAVAJOB_ID=$(echo ${LAVAJOB_ID} | jq '.job_id')
121 LAVAJOB_ID="${LAVAJOB_ID//\"/}"
122
123 iter=$(( iter + 1 ))
124 done
Paul Sokolovsky35c231a2023-05-12 22:53:23 +0300125 fi
126else
127 # Submit directly to LAVA
128 LAVAJOB_ID=$(resilient_cmd lavacli jobs submit artefacts-lava/job.yaml)
129fi
Leonardo Sandoval9f159802021-01-20 16:47:25 -0600130
Paul Sokolovsky35c231a2023-05-12 22:53:23 +0300131
Paul Sokolovsky74cd2e02023-05-12 23:40:55 +0300132# check that rest query at least get non-empty value
133if [ -n "${LAVAJOB_ID}" ]; then
Leonardo Sandoval9f159802021-01-20 16:47:25 -0600134
Paul Sokolovsky74cd2e02023-05-12 23:40:55 +0300135 echo "LAVA URL: https://${LAVA_SERVER}/scheduler/job/${LAVAJOB_ID} LAVA JOB ID: ${LAVAJOB_ID}"
Leonardo Sandoval9f159802021-01-20 16:47:25 -0600136
Leonardo Sandovala8078d62021-02-11 16:29:25 -0600137
Paul Sokolovsky74cd2e02023-05-12 23:40:55 +0300138 # if timeout on waiting for LAVA to complete, create an 'artificial' lava.log indicating
139 # job ID and timeout seconds
140 if ! wait_lava_job ${LAVAJOB_ID}; then
141 echo "Stopped monitoring LAVA JOB ${LAVAJOB_ID}, likely stuck or timeout too short?" | tee "${WORKSPACE}/lava.log"
142 exit 1
143 else
144 # Retrieve the test job plain log which is a yaml format file from LAVA
145 resilient_cmd sh -c "lavacli jobs logs --raw ${LAVAJOB_ID} > ${WORKSPACE}/lava-raw.log"
Arthur She5fc74272021-03-26 21:24:34 -0700146
Paul Sokolovsky74cd2e02023-05-12 23:40:55 +0300147 # Fetch and store LAVA job result (1 failure, 0 success)
148 resilient_cmd lavacli results ${LAVAJOB_ID} | tee "${WORKSPACE}/lava.results"
149 if grep -q '\[fail\]' "${WORKSPACE}/lava.results"; then
Paul Sokolovskyeab838b2023-12-30 00:15:48 +0300150 return 1
Leonardo Sandoval9f159802021-01-20 16:47:25 -0600151 else
Paul Sokolovskyeab838b2023-12-30 00:15:48 +0300152 return 0
Leonardo Sandoval9f159802021-01-20 16:47:25 -0600153 fi
Milosz Wasilewski10438582020-12-03 11:36:21 +0000154 fi
Paul Sokolovsky74cd2e02023-05-12 23:40:55 +0300155else
156 echo "LAVA Job ID could not be obtained"
157 exit 1
Milosz Wasilewski10438582020-12-03 11:36:21 +0000158fi
Paul Sokolovsky74cd2e02023-05-12 23:40:55 +0300159
Paul Sokolovskyeab838b2023-12-30 00:15:48 +0300160}
161
162# FIXME: Juno and FVP jobs may fail due to non-related users changes,
163# so CI needs to resubmit the job, at most three times:
164# Juno jobs may fail due to LAVA lab infrastructure issues (see
165# https://projects.linaro.org/browse/LSS-2128)
166# FVP jobs may hang at some particular TFTF test (see
167# https://linaro.atlassian.net/browse/TFC-176)
168
169# UPDATE: We want to keep retrying for LAVA for historical reasons,
170# but we want to start from clean page with TuxSuite, so don't
171# retry for it for now, and see how it goes.
172
173status=1
174for i in $(seq 1 ${LAVA_RETRIES:-3}); do
175 echo "# LAVA submission iteration #$i"
176 if submit_via_lava_or_squad; then
177 status=0
178 break
179 fi
180done
181
182exit ${status}