Milosz Wasilewski | 1043858 | 2020-12-03 11:36:21 +0000 | [diff] [blame] | 1 | #!/bin/bash |
| 2 | |
Leonardo Sandoval | 9f15980 | 2021-01-20 16:47:25 -0600 | [diff] [blame] | 3 | set -xe |
Milosz Wasilewski | 1043858 | 2020-12-03 11:36:21 +0000 | [diff] [blame] | 4 | |
Paul Sokolovsky | 7bdac64 | 2023-12-30 00:40:00 +0300 | [diff] [blame] | 5 | USE_TUXSUITE_FVP=${USE_TUXSUITE_FVP:-0} |
| 6 | |
| 7 | # Get LAVA device type from a job file |
| 8 | get_lava_device_type() { |
| 9 | local job_file=$1 |
| 10 | awk '/^device_type:/ {print $2}' ${job_file} |
| 11 | } |
| 12 | |
| 13 | setup_tuxsuite() { |
| 14 | mkdir -p ~/.config/tuxsuite/ |
| 15 | cat > ~/.config/tuxsuite/config.ini <<EOF |
| 16 | [default] |
| 17 | token=$TUXSUITE_TOKEN |
Arthur She | 78acaae | 2025-02-05 06:15:04 +0100 | [diff] [blame] | 18 | group=$TUXSUITE_GROUP |
| 19 | project=$TUXSUITE_PROJECT |
Paul Sokolovsky | 7bdac64 | 2023-12-30 00:40:00 +0300 | [diff] [blame] | 20 | EOF |
| 21 | } |
Paul Sokolovsky | 35c231a | 2023-05-12 22:53:23 +0300 | [diff] [blame] | 22 | |
Leonardo Sandoval | 5a33566 | 2021-03-26 19:57:40 -0600 | [diff] [blame] | 23 | # Wait for the LAVA job to finished |
| 24 | # By default, timeout at 5400 secs (1.5 hours) and monitor every 60 seconds |
| 25 | wait_lava_job() { |
Paul Sokolovsky | 99eaff6 | 2023-01-25 19:41:23 +0700 | [diff] [blame] | 26 | set +x |
Leonardo Sandoval | 5a33566 | 2021-03-26 19:57:40 -0600 | [diff] [blame] | 27 | local id=$1 |
| 28 | local timeout="${2:-5400}" |
| 29 | local interval="${3:-60}" |
| 30 | |
| 31 | (( t = timeout )) |
| 32 | |
| 33 | while ((t > 0)); do |
| 34 | sleep $interval |
Paul Sokolovsky | 99eaff6 | 2023-01-25 19:41:23 +0700 | [diff] [blame] | 35 | resilient_cmd lavacli jobs show $id | tee "${WORKSPACE}/lava-progress.show" | grep 'state *:' |
| 36 | set +x |
Leonardo Sandoval | 5a33566 | 2021-03-26 19:57:40 -0600 | [diff] [blame] | 37 | if grep 'state.*: Finished' "${WORKSPACE}/lava-progress.show"; then |
Paul Sokolovsky | 99eaff6 | 2023-01-25 19:41:23 +0700 | [diff] [blame] | 38 | set -x |
| 39 | cat "${WORKSPACE}/lava-progress.show" |
Paul Sokolovsky | a153b60 | 2022-10-12 20:50:05 +0300 | [diff] [blame] | 40 | # finished |
| 41 | return 0 |
Leonardo Sandoval | 5a33566 | 2021-03-26 19:57:40 -0600 | [diff] [blame] | 42 | fi |
| 43 | ((t -= interval)) |
| 44 | done |
Paul Sokolovsky | 99eaff6 | 2023-01-25 19:41:23 +0700 | [diff] [blame] | 45 | set -x |
| 46 | cat "${WORKSPACE}/lava-progress.show" |
| 47 | echo "Timeout waiting for job to finish" |
Paul Sokolovsky | a153b60 | 2022-10-12 20:50:05 +0300 | [diff] [blame] | 48 | # timeout |
| 49 | return 1 |
Leonardo Sandoval | 5a33566 | 2021-03-26 19:57:40 -0600 | [diff] [blame] | 50 | } |
| 51 | |
Leonardo Sandoval | eb94e91 | 2021-01-29 12:23:59 -0600 | [diff] [blame] | 52 | # Run the given command passed through parameters, if fails, try |
| 53 | # at most more N-times with a pause of M-seconds until success. |
| 54 | resilient_cmd() { |
Paul Sokolovsky | 99eaff6 | 2023-01-25 19:41:23 +0700 | [diff] [blame] | 55 | set +x |
Paul Sokolovsky | 7fb59b5 | 2023-05-13 10:00:57 +0300 | [diff] [blame] | 56 | local max_retries=10 |
Leonardo Sandoval | eb94e91 | 2021-01-29 12:23:59 -0600 | [diff] [blame] | 57 | local sleep_body=2 |
| 58 | local iter=0 |
| 59 | |
Leonardo Sandoval | eb94e91 | 2021-01-29 12:23:59 -0600 | [diff] [blame] | 60 | while true; do |
Paul Sokolovsky | 04004c1 | 2023-02-24 18:27:40 +0700 | [diff] [blame] | 61 | if "$@"; then |
Leonardo Sandoval | eb94e91 | 2021-01-29 12:23:59 -0600 | [diff] [blame] | 62 | break |
| 63 | fi |
| 64 | |
| 65 | sleep ${sleep_body} |
Paul Sokolovsky | 719ac1d | 2023-05-13 10:03:37 +0300 | [diff] [blame] | 66 | # Exponential backoff |
| 67 | sleep_body=$(( sleep_body * 2 )) |
| 68 | if [ ${sleep_body} -ge 60 ]; then |
| 69 | sleep_body=60 |
| 70 | echo "WARNING: Command '$@' still not successful on retry #${iter}, exp backoff already limited" 1>&2 |
| 71 | fi |
Leonardo Sandoval | eb94e91 | 2021-01-29 12:23:59 -0600 | [diff] [blame] | 72 | |
| 73 | iter=$(( iter + 1 )) |
Paul Sokolovsky | 7fb59b5 | 2023-05-13 10:00:57 +0300 | [diff] [blame] | 74 | if [ ${iter} -ge ${max_retries} ]; then |
Paul Sokolovsky | 1f0b748 | 2023-05-09 22:47:49 +0300 | [diff] [blame] | 75 | echo "ERROR: Command '$@' failed ${iter} times in row" 1>&2 |
Paul Sokolovsky | 99eaff6 | 2023-01-25 19:41:23 +0700 | [diff] [blame] | 76 | set -x |
Leonardo Sandoval | eb94e91 | 2021-01-29 12:23:59 -0600 | [diff] [blame] | 77 | return 1 |
| 78 | fi |
| 79 | done |
Paul Sokolovsky | 99eaff6 | 2023-01-25 19:41:23 +0700 | [diff] [blame] | 80 | set -x |
Leonardo Sandoval | eb94e91 | 2021-01-29 12:23:59 -0600 | [diff] [blame] | 81 | return 0 |
| 82 | } |
| 83 | |
Milosz Wasilewski | 1043858 | 2020-12-03 11:36:21 +0000 | [diff] [blame] | 84 | ls -l ${WORKSPACE} |
| 85 | |
Paul Sokolovsky | 7bdac64 | 2023-12-30 00:40:00 +0300 | [diff] [blame] | 86 | DEVICE=$(get_lava_device_type artefacts-lava/job.yaml) |
| 87 | |
| 88 | if [ "${DEVICE}" == "fvp" -a "${USE_TUXSUITE_FVP}" -ne 0 ]; then |
| 89 | setup_tuxsuite |
| 90 | set -o pipefail |
Paul Sokolovsky | 5ca3d09 | 2024-01-24 00:31:34 +0700 | [diff] [blame] | 91 | for i in $(seq 1 ${LAVA_RETRIES:-3}); do |
| 92 | echo "# TuxSuite submission iteration #$i" |
| 93 | if python3 -u -m tuxsuite test submit --device fvp-lava --job-definition artefacts-lava/job.yaml | tee tuxsuite-submit.out; then |
| 94 | status=0 |
| 95 | break |
| 96 | else |
| 97 | status=$? |
| 98 | echo "TuxSuite test failed, status: ${status}" |
| 99 | fi |
| 100 | done |
Paul Sokolovsky | 7bdac64 | 2023-12-30 00:40:00 +0300 | [diff] [blame] | 101 | TUXID=$(awk '/^uid:/ {print $2}' tuxsuite-submit.out) |
| 102 | echo "TuxSuite test ID: ${TUXID}" |
| 103 | echo ${TUXID} > ${WORKSPACE}/tux.id |
| 104 | tuxsuite test logs --raw ${TUXID} > ${WORKSPACE}/lava-raw.log |
| 105 | |
Paul Sokolovsky | 4b16d2d | 2024-03-04 17:12:27 +0700 | [diff] [blame] | 106 | if tuxsuite test results ${TUXID} | grep -v "lava.http-download" | grep -q 'fail'; then |
Paul Sokolovsky | 7bdac64 | 2023-12-30 00:40:00 +0300 | [diff] [blame] | 107 | echo "tuxsuite test submit status was: ${status}, failing testcases found, setting as 1 (failed)" |
| 108 | status=1 |
| 109 | fi |
| 110 | |
Paul Sokolovsky | ff615d9 | 2024-01-26 16:29:55 +0700 | [diff] [blame] | 111 | echo "TuxSuite test result: ${status}" |
| 112 | |
Paul Sokolovsky | 7bdac64 | 2023-12-30 00:40:00 +0300 | [diff] [blame] | 113 | exit ${status} |
| 114 | fi |
| 115 | |
Chris Kay | be24f24 | 2025-07-28 13:49:32 +0100 | [diff] [blame] | 116 | function submit_via_lava() { |
| 117 | lavacli identities add --username ${LAVA_USER} --token ${LAVA_TOKEN} --uri "https://${LAVA_SERVER}/RPC2" default |
Paul Sokolovsky | 35c231a | 2023-05-12 22:53:23 +0300 | [diff] [blame] | 118 | LAVAJOB_ID=$(resilient_cmd lavacli jobs submit artefacts-lava/job.yaml) |
Leonardo Sandoval | 9f15980 | 2021-01-20 16:47:25 -0600 | [diff] [blame] | 119 | |
Chris Kay | be24f24 | 2025-07-28 13:49:32 +0100 | [diff] [blame] | 120 | # check that rest query at least get non-empty value |
| 121 | if [ -n "${LAVAJOB_ID}" ]; then |
| 122 | echo "LAVA URL: https://${LAVA_SERVER}/scheduler/job/${LAVAJOB_ID} LAVA JOB ID: ${LAVAJOB_ID}" |
Paul Sokolovsky | 35c231a | 2023-05-12 22:53:23 +0300 | [diff] [blame] | 123 | |
Chris Kay | be24f24 | 2025-07-28 13:49:32 +0100 | [diff] [blame] | 124 | # if timeout on waiting for LAVA to complete, create an 'artificial' lava.log indicating |
| 125 | # job ID and timeout seconds |
| 126 | if ! wait_lava_job ${LAVAJOB_ID}; then |
| 127 | echo "Stopped monitoring LAVA JOB ${LAVAJOB_ID}, likely stuck or timeout too short?" | tee "${WORKSPACE}/lava.log" |
| 128 | exit 1 |
Leonardo Sandoval | 9f15980 | 2021-01-20 16:47:25 -0600 | [diff] [blame] | 129 | else |
Chris Kay | be24f24 | 2025-07-28 13:49:32 +0100 | [diff] [blame] | 130 | # Retrieve the test job plain log which is a yaml format file from LAVA |
| 131 | resilient_cmd sh -c "lavacli jobs logs --raw ${LAVAJOB_ID} > ${WORKSPACE}/lava-raw.log" |
Paul Sokolovsky | 74cd2e0 | 2023-05-12 23:40:55 +0300 | [diff] [blame] | 132 | |
Chris Kay | be24f24 | 2025-07-28 13:49:32 +0100 | [diff] [blame] | 133 | # Fetch and store LAVA job result (1 failure, 0 success) |
| 134 | resilient_cmd lavacli results ${LAVAJOB_ID} | tee "${WORKSPACE}/lava.results" |
| 135 | if grep -q '\[fail\]' "${WORKSPACE}/lava.results"; then |
| 136 | return 1 |
| 137 | else |
| 138 | return 0 |
| 139 | fi |
| 140 | fi |
| 141 | else |
| 142 | echo "LAVA Job ID could not be obtained" |
| 143 | exit 1 |
| 144 | fi |
Paul Sokolovsky | eab838b | 2023-12-30 00:15:48 +0300 | [diff] [blame] | 145 | } |
| 146 | |
| 147 | # FIXME: Juno and FVP jobs may fail due to non-related users changes, |
| 148 | # so CI needs to resubmit the job, at most three times: |
| 149 | # Juno jobs may fail due to LAVA lab infrastructure issues (see |
| 150 | # https://projects.linaro.org/browse/LSS-2128) |
| 151 | # FVP jobs may hang at some particular TFTF test (see |
| 152 | # https://linaro.atlassian.net/browse/TFC-176) |
| 153 | |
| 154 | # UPDATE: We want to keep retrying for LAVA for historical reasons, |
| 155 | # but we want to start from clean page with TuxSuite, so don't |
| 156 | # retry for it for now, and see how it goes. |
| 157 | |
| 158 | status=1 |
| 159 | for i in $(seq 1 ${LAVA_RETRIES:-3}); do |
| 160 | echo "# LAVA submission iteration #$i" |
Chris Kay | be24f24 | 2025-07-28 13:49:32 +0100 | [diff] [blame] | 161 | if submit_via_lava; then |
Paul Sokolovsky | eab838b | 2023-12-30 00:15:48 +0300 | [diff] [blame] | 162 | status=0 |
| 163 | break |
| 164 | fi |
| 165 | done |
| 166 | |
| 167 | exit ${status} |