Olivier Deprez | 40134f8 | 2022-12-21 16:56:27 +0100 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (c) 2015-2023, Arm Limited. All rights reserved. |
| 3 | * |
| 4 | * SPDX-License-Identifier: BSD-3-Clause |
| 5 | */ |
| 6 | |
| 7 | /* -*- C -*- |
| 8 | * |
| 9 | * Copyright 2015 ARM Limited. All rights reserved. |
| 10 | */ |
| 11 | |
| 12 | #ifndef ARM_INCLUDE_SMMUv3TestEngine_h |
| 13 | #define ARM_INCLUDE_SMMUv3TestEngine_h |
| 14 | |
| 15 | #include <inttypes.h> |
| 16 | |
| 17 | /// |
| 18 | /// Notes on interfacing to PCIe |
| 19 | /// ---------------------------- |
| 20 | /// |
| 21 | /// MSIAddress and MSIData are held in the MSI Table that is found by a BAR. |
| 22 | /// |
| 23 | /// So if operating under PCIe then MSIAddress should be '1' and MSIData is |
| 24 | /// interpreted as the vector to use (0..2048). If MSIAddress is not '0' or '1' |
| 25 | /// then the frame is misconfigured. |
| 26 | /// |
| 27 | /// StreamID is not run-time assignable as it is an attribute of the topology of |
| 28 | /// the system. |
| 29 | /// |
| 30 | /// In PCIe, then we need multiple instances of the engine and it shall occupy |
| 31 | /// one Function. |
| 32 | /// |
| 33 | /// Each BAR is 64 bits so the three BARs are: |
| 34 | /// * BAR0 is going to point to a set of register frames, at least 128 KiB |
| 35 | /// * BAR1/2 are MSI-X vector/pending bit array (PBA). |
| 36 | /// |
| 37 | |
| 38 | |
| 39 | /// |
| 40 | /// The engine consists of a series of contiguous pairs of 64 KiB pages, each |
| 41 | /// page consists of a series of frames. The frames in the first page (User |
| 42 | /// Page) are expected to be able to be exposed to a low privileged piece of SW, |
| 43 | /// whilst the second page (Privileged Page) is expected to be controlled by a |
| 44 | /// higher level of SW. |
| 45 | /// |
| 46 | /// Examples: |
| 47 | /// 1) User Page controlled by EL1 |
| 48 | /// Privileged Page controlled by EL2 |
| 49 | /// 2) User Page controlled by EL0 |
| 50 | /// Privileged Page controlled by EL1 |
| 51 | /// |
| 52 | /// The engine can have an unlimited number of pairs. |
| 53 | /// |
| 54 | /// Each pair of pages are full of register frames. The frames are the same |
| 55 | /// size in both and frame N in the User page corresponds to frame N in the |
| 56 | /// Privileged page. |
| 57 | /// |
| 58 | /// The work load is setup by filling out all the non-cmd fields and then |
| 59 | /// writing to cmd the command code. If Device-nGnR(n)E is used then no |
| 60 | /// explicit barrier instruction is required. |
| 61 | /// |
| 62 | /// When the work has finished then the engine sets cmd to ENGINE_HALTED or |
| 63 | /// ENGINE_ERROR depending on if the engine encountered an error. |
| 64 | /// |
| 65 | /// If the command was run then an MSI will be generated if msiaddress != 0, |
| 66 | /// independent of if there was an error or not. If the MSI abort then |
| 67 | /// uctrl.MSI_ABORTED is set. |
| 68 | /// |
| 69 | /// If the frame/command was invalid for some reason then no MSI will be |
| 70 | /// generated under the assumption that it can't trust the msiaddress field and |
| 71 | /// ENGINE_FRAME_MISCONFIGURED is read out of cmd. Thus the user should write |
| 72 | /// the command and then immediately read to see if it is in the |
| 73 | /// ENGINE_FRAME_MISCONFIGURED state. It is guaranteed that that a read of cmd |
| 74 | /// after writing cmd will immediately return ENGINE_FRAME_MISCONFIGURED if the |
| 75 | /// command was invalid. |
| 76 | /// |
| 77 | /// If the engine is not in the ENGINE_HALTED, ENGINE_ERROR or |
| 78 | /// ENGINE_FRAME_MISCONFIGURED state then any writes are ignored. |
| 79 | /// |
| 80 | /// As this is a model-only device then the error diagnostics are crude as it is |
| 81 | /// expected that a verbose error trace stream will come from the model! |
| 82 | /// |
| 83 | /// Most of the work-loads can be seeded to do work in a random order with |
| 84 | /// random transaction sizes. The exact specification of the order and |
| 85 | /// transaction size are TBD. It is intended that the algorithm used is |
| 86 | /// specified so that you can work out the order that it should be done in. |
| 87 | /// |
| 88 | /// The device can issue multiple outstanding transactions for each work-load. |
| 89 | /// |
| 90 | /// The device will accept any size access for all fields except for cmd. |
| 91 | /// |
| 92 | /// If a single burst access crosses the boundary of a user_frame the result is |
| 93 | /// UNPREDICTABLE. From a programmer's perspective, then you can use any way of |
| 94 | /// writing to within the same frame. However, you should only write to cmd_ |
| 95 | /// separately with a single 32 bit access. |
| 96 | /// |
| 97 | /// Whilst running the whole frame is write-ignored and the unspecified values |
| 98 | /// of udata and pdata are UNKNOWN. |
| 99 | /// |
| 100 | /// The begin, end_incl, stride and seed are interpreted as follows: |
| 101 | /// |
| 102 | /// * if [begin & ~7ull, end_incl | 7ull] == [0, ~0ull], ENGINE_FRAME_MISCONFIGURED |
| 103 | /// * such a huge range is not supported for any stride! |
| 104 | /// * stride == 0, ENGINE_FRAME_MISCONFIGURED |
| 105 | /// * stride == 1, then the range operated on is [begin, end_incl] |
| 106 | /// * stride is a multiple of 8 |
| 107 | /// * single 64 bit transfers are performed |
| 108 | /// * the addresses used are: |
| 109 | /// (begin & ~7ull) + n * stride for n = 0..N |
| 110 | /// where the last byte accessed is <= (end_incl | 7) |
| 111 | /// * for any other value of stride, ENGINE_FRAME_MISCONFIGURED |
| 112 | /// * if stride > max(8, end_incl - begin + 1) then only a single |
| 113 | /// element is transferred. |
| 114 | /// * seed == 0 then the sequence of operation is n = 0, 1, 2, .. N |
| 115 | /// though multiple in flight transactions could alter this order. |
| 116 | /// * seed == ~0u then the sequence is n = N, N-1, N-2, .. 0 |
| 117 | /// * seed anything else then sequence randomly pulls one off the front |
| 118 | /// or the back of the range. |
| 119 | /// |
| 120 | /// The random number generator R is defined as: |
| 121 | inline uint32_t testengine_random(uint64_t* storage_) |
| 122 | { |
| 123 | *storage_ = ( |
| 124 | *storage_ * 0x0005deecE66Dull + 0xB |
| 125 | ) & 0xffffFFFFffffull; |
| 126 | uint32_t const t = uint32_t((*storage_ >> 17 /* NOTE */) & 0x7FFFffff); |
| 127 | |
| 128 | // |
| 129 | // Construct the topmost bit by running the generator again and |
| 130 | // choosing a bit from somewhere |
| 131 | // |
| 132 | *storage_ = ( |
| 133 | *storage_ * 0x0005deecE66Dull + 0xB |
| 134 | ) & 0xffffFFFFffffull; |
| 135 | uint32_t const ret = uint32_t(t | (*storage_ & 0x80000000ull)); |
| 136 | return ret; |
| 137 | } |
| 138 | |
| 139 | // Seeding storage from the 'seed' field is: |
| 140 | inline void testengine_random_seed_storage(uint64_t* storage_, uint32_t seed_) |
| 141 | { |
| 142 | *storage_ = uint64_t(seed_) << 16 | 0x330e; |
| 143 | } |
| 144 | |
| 145 | |
| 146 | /// 128 bytes |
| 147 | struct user_frame_t |
| 148 | { |
| 149 | // -- 0 -- |
| 150 | uint32_t cmd; |
| 151 | uint32_t uctrl; |
| 152 | |
| 153 | // -- 1 -- |
| 154 | // These keep track of how much work is being done by the engine. |
| 155 | uint32_t count_of_transactions_launched; |
| 156 | uint32_t count_of_transactions_returned; |
| 157 | |
| 158 | // -- 2 -- |
| 159 | // If operating under PCIe then msiaddress should be either 1 (send MSI-X) |
| 160 | // or 0 (don't send). The MSI-X to send is in msidata. |
| 161 | uint64_t msiaddress; |
| 162 | |
| 163 | // -- 3 -- |
| 164 | // If operating under PCIe then msidata is the MSI-X index in the MSI-X |
| 165 | // vector table to send (0..2047) |
| 166 | // |
| 167 | // If operating under PCIe then msiattr has no effect. |
| 168 | uint32_t msidata; |
| 169 | uint32_t msiattr; // encoded same bottom half of attributes field |
| 170 | |
| 171 | // |
| 172 | // source and destination attributes, including NS attributes if SSD-s |
| 173 | // Includes 'instruction' attributes so the work load can look like |
| 174 | // instruction accesses. |
| 175 | // |
| 176 | // Each halfword encodes: |
| 177 | // 15:14 shareability 0..2 (nsh/ish/osh) (ACE encoding), ignored if a device type |
| 178 | // 13 outer transient, ignored unless outer ACACHE is cacheable |
| 179 | // 12 inner transient, ignored unless inner ACACHE is cacheable |
| 180 | // 10:8 APROT (AMBA encoding) |
| 181 | // 10 InD -- Instruction not Data |
| 182 | // 9 NS -- Non-secure |
| 183 | // 8 PnU -- Privileged not User |
| 184 | // 7:4 ACACHE encoding of outer |
| 185 | // 3:0 if 7:4 == {0,1} |
| 186 | // // Device type |
| 187 | // 3 Gathering if ACACHE is 1, ignored otherwise |
| 188 | // 2 Reordering if ACACHE is 1, ignored otherwise |
| 189 | // else |
| 190 | // // Normal type |
| 191 | // ACACHE encoding of inner |
| 192 | // |
| 193 | // ACACHE encodings: |
| 194 | // 0000 -- Device-nGnRnE |
| 195 | // 0001 -- Device-(n)G(n)RE -- depending on bits [3:2] |
| 196 | // 0010 -- NC-NB (normal non-cacheable non-bufferable) |
| 197 | // 0011 -- NC |
| 198 | // 0100 -- illegal |
| 199 | // 0101 -- illegal |
| 200 | // 0110 -- raWT |
| 201 | // 0111 -- raWB |
| 202 | // 1000 -- illegal |
| 203 | // 1001 -- illegal |
| 204 | // 1010 -- waWT |
| 205 | // 1011 -- waWB |
| 206 | // 1100 -- illegal |
| 207 | // 1101 -- illegal |
| 208 | // 1110 -- rawaWT |
| 209 | // 1111 -- rawaWB |
| 210 | // |
| 211 | // NOTE that the meaning of the ACACHE encodings are dependent on if it is a |
| 212 | // read or a write. AMBA can't encode directly the 'no-allocate cacheable' |
| 213 | // and you have to set the 'other' allocation hint. So for example, a read |
| 214 | // naWB has to be encoded as waWB. A write naWB has to be encoded as raWB, |
| 215 | // etc. |
| 216 | // |
| 217 | // Lowest halfword are 'source' attributes. |
| 218 | // Highest halfword are 'destination' attributes. |
| 219 | // |
| 220 | // NOTE that you can make an non-secure stream output a secure transaction |
| 221 | // -- the SMMU should sort it out. |
| 222 | // |
| 223 | |
| 224 | // -- 4 -- |
| 225 | // Under PCIe then a real Function does not have control over the attributes |
| 226 | // of the transactions that it makes. However, for testing purposes of the |
| 227 | // SMMU then we allow its attributes to be specified (and magically |
| 228 | // transport them over PCIe). |
| 229 | uint32_t attributes; |
| 230 | uint32_t seed; |
| 231 | |
| 232 | // -- 5 -- |
| 233 | uint64_t begin; |
| 234 | // -- 6 -- |
| 235 | uint64_t end_incl; |
| 236 | |
| 237 | // -- 7 -- |
| 238 | uint64_t stride; |
| 239 | |
| 240 | // -- 8 -- |
| 241 | uint64_t udata[8]; |
| 242 | }; |
| 243 | |
| 244 | // 128 bytes |
| 245 | struct privileged_frame_t |
| 246 | { |
| 247 | // -- 0 -- |
| 248 | uint32_t pctrl; |
| 249 | uint32_t downstream_port_index; // [0,64), under PCIe only use port 0 |
| 250 | |
| 251 | // -- 1 -- |
| 252 | // Under PCIe, then streamid is ignored. |
| 253 | uint32_t streamid; |
| 254 | uint32_t substreamid; // ~0u means no substreamid, otherwise must be a 20 bit number or ENGINE_FRAME_MISCONFIGURED |
| 255 | |
| 256 | // -- 2 -- |
| 257 | uint64_t pdata[14]; |
| 258 | }; |
| 259 | |
| 260 | // 128 KiB |
| 261 | struct engine_pair_t |
| 262 | { |
| 263 | user_frame_t user[ 64 * 1024 / sizeof(user_frame_t)]; |
| 264 | privileged_frame_t privileged[ 64 * 1024 / sizeof(privileged_frame_t)]; |
| 265 | }; |
| 266 | |
| 267 | // |
| 268 | // NOTE that we don't have a command that does some writes then some reads. For |
| 269 | // the ACK this is probably not going to be much of a problem. |
| 270 | // |
| 271 | // On completion, an MSI will be sent if the msiaddress != 0. |
| 272 | // |
| 273 | enum cmd_t |
| 274 | { |
| 275 | // ORDER IS IMPORTANT, see predicates later in this file. |
| 276 | |
| 277 | // The frame was misconfigured. |
| 278 | ENGINE_FRAME_MISCONFIGURED = ~0u - 1, |
| 279 | |
| 280 | // The engine encountered an error (downstream transaction aborted). |
| 281 | ENGINE_ERROR = ~0u, |
| 282 | |
| 283 | // This frame is unimplemented or in use by the secure world. |
| 284 | // |
| 285 | // A user _can_ write this to cmd and it will be considered to be |
| 286 | // ENGINE_HALTED. |
| 287 | ENGINE_NO_FRAME = 0, |
| 288 | |
| 289 | // The engine is halted. |
| 290 | ENGINE_HALTED = 1, |
| 291 | |
| 292 | // The engine memcpy's from region [begin, end_incl] to address udata[0]. |
| 293 | // |
| 294 | // If stride is 0 then ENGINE_ERROR is produced, udata[2] contains the error |
| 295 | // address. No MSI is generated. |
| 296 | // |
| 297 | // If stride is 1 then this is a normal memcpy(). If stride is larger then |
| 298 | // not all the data will be copied. |
| 299 | // |
| 300 | // The order and size of the transactions used are determined randomly using |
| 301 | // seed. If seed is: |
| 302 | // 0 -- do them from lowest address to highest address |
| 303 | // ~0u -- do them in reverse order |
| 304 | // otherwise use the value as a seed to do them in random order |
| 305 | // The ability to do them in a non-random order means that we stand a |
| 306 | // chance of getting merged event records. |
| 307 | // |
| 308 | // This models a work-load where we start with some reads and then do some |
| 309 | // writes. |
| 310 | ENGINE_MEMCPY = 2, |
| 311 | |
| 312 | // The engine randomizes region [begin, end_incl] using rand48, seeded |
| 313 | // with seed and using the specified stride. |
| 314 | // |
| 315 | // The order and size of the transactions used are determined randomly using |
| 316 | // seed. |
| 317 | // |
| 318 | // The seed is used to create a random number generator that is used to |
| 319 | // choose the direction. |
| 320 | // |
| 321 | // A separate random number generator per transaction is then used based on |
| 322 | // seed and the address: |
| 323 | // |
| 324 | // seed_per_transaction = seed ^ (address >> 32) ^ (address & 0xFFFFffff); |
| 325 | // |
| 326 | // This seed is then used to seed a random number generator to fill the |
| 327 | // required space. The data used should be: |
| 328 | // uint64_t storage; |
| 329 | // for (uint8_t* p = (uintptr_t)begin; p != (uintptr_t)end_incl; ++ p) |
| 330 | // { |
| 331 | // // When we cross a 4 KiB we reseed. |
| 332 | // if ((p & 0xFFF) == 0 || p == begin) |
| 333 | // { |
| 334 | // testengine_random_seed_storage( |
| 335 | // V ^ ((uintptr_t)p >> 32) ^ (uint32_t((uintptr_t)p)) |
| 336 | // ); |
| 337 | // } |
| 338 | // assert( *p == (uint8_t)testengine_random(&storage) ); |
| 339 | // ++ p; |
| 340 | // } |
| 341 | // This isn't the most efficient way of doing it as it throws away a lot of |
| 342 | // entropy from the call to testengine_random() but then we aren't aiming for |
| 343 | // good random numbers. |
| 344 | // |
| 345 | // If stride is 0 then ENGINE_ERROR is produced, data[2] contains the error |
| 346 | // address. (NOTE that data[1] is not used). |
| 347 | // |
| 348 | // If stride is 1 then this fills the entire buffer. If stride is larger |
| 349 | // then not all the data will be randomized. |
| 350 | // |
| 351 | // This models a write-only work-load. |
| 352 | ENGINE_RAND48 = 3, |
| 353 | |
| 354 | // The engine reads [begin, end_incl], treats the region as a set of |
| 355 | // uint64_t and sums them, delivering the result to udata[1], using the |
| 356 | // specified stride. |
| 357 | // |
| 358 | // If stride is 0 then ENGINE_ERROR is produced, udata[2] is the error |
| 359 | // address. |
| 360 | // |
| 361 | // If stride is 1 then this sums the entire buffer. If stride is larger |
| 362 | // then not all the data will be summed. |
| 363 | // |
| 364 | // The order and size of the transactions used are determined randomly using |
| 365 | // seed. |
| 366 | // |
| 367 | // The begin must be 64 bit aligned (begin & 7) == 0 and the end_incl must |
| 368 | // end at the end of a 64 bit quantitity (end_incl & 7) == 7, otherwise |
| 369 | // ENGINE_FRAME_MISCONFIGURED is generated. |
| 370 | // |
| 371 | // This models a read-only work-load. |
| 372 | ENGINE_SUM64 = 4 |
| 373 | }; |
| 374 | |
| 375 | static inline bool is_valid_and_running(cmd_t t_) |
| 376 | { |
| 377 | unsigned const t = t_; // compensate for bad MSVC treating t_ as signed! |
| 378 | return ENGINE_MEMCPY <= t && t <= ENGINE_SUM64; |
| 379 | } |
| 380 | |
| 381 | static inline bool is_in_error_state(cmd_t t_) |
| 382 | { |
| 383 | return t_ == ENGINE_ERROR || t_ == ENGINE_FRAME_MISCONFIGURED; |
| 384 | } |
| 385 | |
| 386 | static inline bool is_in_error_or_stopped_state(cmd_t t_) |
| 387 | { |
| 388 | return t_ == ENGINE_NO_FRAME |
| 389 | || t_ == ENGINE_HALTED |
| 390 | || is_in_error_state(t_); |
| 391 | } |
| 392 | |
| 393 | static inline bool is_invalid(cmd_t t_) |
| 394 | { |
| 395 | unsigned const t = t_; // compensate for bad MSVC treating t_ as signed! |
| 396 | return ENGINE_SUM64 < t && t < ENGINE_FRAME_MISCONFIGURED; |
| 397 | } |
| 398 | |
| 399 | /// pctrl has layout |
| 400 | /// |
| 401 | /// 0 -- SSD_NS -- the stream and frame is non-secure |
| 402 | /// -- note that if this is zero then it means the |
| 403 | /// frame is controlled by secure SW and non-secure |
| 404 | /// accesses are RAZ/WI (and so see ENGINE_NO_FRAME) |
| 405 | /// Secure SW can only generate secure SSD StreamIDs |
| 406 | /// This could be relaxed in the future if people need |
| 407 | /// to. |
| 408 | /// |
| 409 | /// 8 -- ATS_ENABLE -- CURRENTLY HAS NO EFFECT |
| 410 | /// 9 -- PRI_ENABLE -- CURRENTLY HAS NO EFFECT |
| 411 | /// |
| 412 | /// SSD_NS can only be altered by a secure access. Once clear then the |
| 413 | /// corresponding user and privileged frames are accessible only to secure |
| 414 | /// accesses. Non-secure accesses are RAZ/WI (and hence cmd will be |
| 415 | /// ENGINE_NO_FRAME to non-secure accesses). |
| 416 | /// |
| 417 | /// ATS_ENABLE/PRI_ENABLE are not currently implemented and their intent is for |
| 418 | /// per-substreamid ATS/PRI support. |
| 419 | /// |
| 420 | /// However, ATS/PRI support for the whole StreamID is advertised through the |
| 421 | /// PCIe Extended Capabilities Header. |
| 422 | /// |
| 423 | |
| 424 | /// uctrl has layout |
| 425 | /// |
| 426 | /// 0 -- MSI_ABORTED -- an MSI aborted (set by the engine) |
| 427 | /// |
| 428 | /// 16-31 -- RATE -- some ill-defined metric for how fast to do the work! |
| 429 | /// |
| 430 | |
| 431 | #endif |