blob: b168d50d5b4c165d0bb3d00394f1436a298ecbd1 [file] [log] [blame]
Olivier Deprez40134f82022-12-21 16:56:27 +01001/*
2 * Copyright (c) 2015-2023, Arm Limited. All rights reserved.
3 *
4 * SPDX-License-Identifier: BSD-3-Clause
5 */
6
7/* -*- C -*-
8 *
9 * Copyright 2015 ARM Limited. All rights reserved.
10 */
11
12#ifndef ARM_INCLUDE_SMMUv3TestEngine_h
13#define ARM_INCLUDE_SMMUv3TestEngine_h
14
15#include <inttypes.h>
16
17///
18/// Notes on interfacing to PCIe
19/// ----------------------------
20///
21/// MSIAddress and MSIData are held in the MSI Table that is found by a BAR.
22///
23/// So if operating under PCIe then MSIAddress should be '1' and MSIData is
24/// interpreted as the vector to use (0..2048). If MSIAddress is not '0' or '1'
25/// then the frame is misconfigured.
26///
27/// StreamID is not run-time assignable as it is an attribute of the topology of
28/// the system.
29///
30/// In PCIe, then we need multiple instances of the engine and it shall occupy
31/// one Function.
32///
33/// Each BAR is 64 bits so the three BARs are:
34/// * BAR0 is going to point to a set of register frames, at least 128 KiB
35/// * BAR1/2 are MSI-X vector/pending bit array (PBA).
36///
37
38
39///
40/// The engine consists of a series of contiguous pairs of 64 KiB pages, each
41/// page consists of a series of frames. The frames in the first page (User
42/// Page) are expected to be able to be exposed to a low privileged piece of SW,
43/// whilst the second page (Privileged Page) is expected to be controlled by a
44/// higher level of SW.
45///
46/// Examples:
47/// 1) User Page controlled by EL1
48/// Privileged Page controlled by EL2
49/// 2) User Page controlled by EL0
50/// Privileged Page controlled by EL1
51///
52/// The engine can have an unlimited number of pairs.
53///
54/// Each pair of pages are full of register frames. The frames are the same
55/// size in both and frame N in the User page corresponds to frame N in the
56/// Privileged page.
57///
58/// The work load is setup by filling out all the non-cmd fields and then
59/// writing to cmd the command code. If Device-nGnR(n)E is used then no
60/// explicit barrier instruction is required.
61///
62/// When the work has finished then the engine sets cmd to ENGINE_HALTED or
63/// ENGINE_ERROR depending on if the engine encountered an error.
64///
65/// If the command was run then an MSI will be generated if msiaddress != 0,
66/// independent of if there was an error or not. If the MSI abort then
67/// uctrl.MSI_ABORTED is set.
68///
69/// If the frame/command was invalid for some reason then no MSI will be
70/// generated under the assumption that it can't trust the msiaddress field and
71/// ENGINE_FRAME_MISCONFIGURED is read out of cmd. Thus the user should write
72/// the command and then immediately read to see if it is in the
73/// ENGINE_FRAME_MISCONFIGURED state. It is guaranteed that that a read of cmd
74/// after writing cmd will immediately return ENGINE_FRAME_MISCONFIGURED if the
75/// command was invalid.
76///
77/// If the engine is not in the ENGINE_HALTED, ENGINE_ERROR or
78/// ENGINE_FRAME_MISCONFIGURED state then any writes are ignored.
79///
80/// As this is a model-only device then the error diagnostics are crude as it is
81/// expected that a verbose error trace stream will come from the model!
82///
83/// Most of the work-loads can be seeded to do work in a random order with
84/// random transaction sizes. The exact specification of the order and
85/// transaction size are TBD. It is intended that the algorithm used is
86/// specified so that you can work out the order that it should be done in.
87///
88/// The device can issue multiple outstanding transactions for each work-load.
89///
90/// The device will accept any size access for all fields except for cmd.
91///
92/// If a single burst access crosses the boundary of a user_frame the result is
93/// UNPREDICTABLE. From a programmer's perspective, then you can use any way of
94/// writing to within the same frame. However, you should only write to cmd_
95/// separately with a single 32 bit access.
96///
97/// Whilst running the whole frame is write-ignored and the unspecified values
98/// of udata and pdata are UNKNOWN.
99///
100/// The begin, end_incl, stride and seed are interpreted as follows:
101///
102/// * if [begin & ~7ull, end_incl | 7ull] == [0, ~0ull], ENGINE_FRAME_MISCONFIGURED
103/// * such a huge range is not supported for any stride!
104/// * stride == 0, ENGINE_FRAME_MISCONFIGURED
105/// * stride == 1, then the range operated on is [begin, end_incl]
106/// * stride is a multiple of 8
107/// * single 64 bit transfers are performed
108/// * the addresses used are:
109/// (begin & ~7ull) + n * stride for n = 0..N
110/// where the last byte accessed is <= (end_incl | 7)
111/// * for any other value of stride, ENGINE_FRAME_MISCONFIGURED
112/// * if stride > max(8, end_incl - begin + 1) then only a single
113/// element is transferred.
114/// * seed == 0 then the sequence of operation is n = 0, 1, 2, .. N
115/// though multiple in flight transactions could alter this order.
116/// * seed == ~0u then the sequence is n = N, N-1, N-2, .. 0
117/// * seed anything else then sequence randomly pulls one off the front
118/// or the back of the range.
119///
120/// The random number generator R is defined as:
121inline uint32_t testengine_random(uint64_t* storage_)
122{
123 *storage_ = (
124 *storage_ * 0x0005deecE66Dull + 0xB
125 ) & 0xffffFFFFffffull;
126 uint32_t const t = uint32_t((*storage_ >> 17 /* NOTE */) & 0x7FFFffff);
127
128 //
129 // Construct the topmost bit by running the generator again and
130 // choosing a bit from somewhere
131 //
132 *storage_ = (
133 *storage_ * 0x0005deecE66Dull + 0xB
134 ) & 0xffffFFFFffffull;
135 uint32_t const ret = uint32_t(t | (*storage_ & 0x80000000ull));
136 return ret;
137}
138
139// Seeding storage from the 'seed' field is:
140inline void testengine_random_seed_storage(uint64_t* storage_, uint32_t seed_)
141{
142 *storage_ = uint64_t(seed_) << 16 | 0x330e;
143}
144
145
146/// 128 bytes
147struct user_frame_t
148{
149 // -- 0 --
150 uint32_t cmd;
151 uint32_t uctrl;
152
153 // -- 1 --
154 // These keep track of how much work is being done by the engine.
155 uint32_t count_of_transactions_launched;
156 uint32_t count_of_transactions_returned;
157
158 // -- 2 --
159 // If operating under PCIe then msiaddress should be either 1 (send MSI-X)
160 // or 0 (don't send). The MSI-X to send is in msidata.
161 uint64_t msiaddress;
162
163 // -- 3 --
164 // If operating under PCIe then msidata is the MSI-X index in the MSI-X
165 // vector table to send (0..2047)
166 //
167 // If operating under PCIe then msiattr has no effect.
168 uint32_t msidata;
169 uint32_t msiattr; // encoded same bottom half of attributes field
170
171 //
172 // source and destination attributes, including NS attributes if SSD-s
173 // Includes 'instruction' attributes so the work load can look like
174 // instruction accesses.
175 //
176 // Each halfword encodes:
177 // 15:14 shareability 0..2 (nsh/ish/osh) (ACE encoding), ignored if a device type
178 // 13 outer transient, ignored unless outer ACACHE is cacheable
179 // 12 inner transient, ignored unless inner ACACHE is cacheable
180 // 10:8 APROT (AMBA encoding)
181 // 10 InD -- Instruction not Data
182 // 9 NS -- Non-secure
183 // 8 PnU -- Privileged not User
184 // 7:4 ACACHE encoding of outer
185 // 3:0 if 7:4 == {0,1}
186 // // Device type
187 // 3 Gathering if ACACHE is 1, ignored otherwise
188 // 2 Reordering if ACACHE is 1, ignored otherwise
189 // else
190 // // Normal type
191 // ACACHE encoding of inner
192 //
193 // ACACHE encodings:
194 // 0000 -- Device-nGnRnE
195 // 0001 -- Device-(n)G(n)RE -- depending on bits [3:2]
196 // 0010 -- NC-NB (normal non-cacheable non-bufferable)
197 // 0011 -- NC
198 // 0100 -- illegal
199 // 0101 -- illegal
200 // 0110 -- raWT
201 // 0111 -- raWB
202 // 1000 -- illegal
203 // 1001 -- illegal
204 // 1010 -- waWT
205 // 1011 -- waWB
206 // 1100 -- illegal
207 // 1101 -- illegal
208 // 1110 -- rawaWT
209 // 1111 -- rawaWB
210 //
211 // NOTE that the meaning of the ACACHE encodings are dependent on if it is a
212 // read or a write. AMBA can't encode directly the 'no-allocate cacheable'
213 // and you have to set the 'other' allocation hint. So for example, a read
214 // naWB has to be encoded as waWB. A write naWB has to be encoded as raWB,
215 // etc.
216 //
217 // Lowest halfword are 'source' attributes.
218 // Highest halfword are 'destination' attributes.
219 //
220 // NOTE that you can make an non-secure stream output a secure transaction
221 // -- the SMMU should sort it out.
222 //
223
224 // -- 4 --
225 // Under PCIe then a real Function does not have control over the attributes
226 // of the transactions that it makes. However, for testing purposes of the
227 // SMMU then we allow its attributes to be specified (and magically
228 // transport them over PCIe).
229 uint32_t attributes;
230 uint32_t seed;
231
232 // -- 5 --
233 uint64_t begin;
234 // -- 6 --
235 uint64_t end_incl;
236
237 // -- 7 --
238 uint64_t stride;
239
240 // -- 8 --
241 uint64_t udata[8];
242};
243
244// 128 bytes
245struct privileged_frame_t
246{
247 // -- 0 --
248 uint32_t pctrl;
249 uint32_t downstream_port_index; // [0,64), under PCIe only use port 0
250
251 // -- 1 --
252 // Under PCIe, then streamid is ignored.
253 uint32_t streamid;
254 uint32_t substreamid; // ~0u means no substreamid, otherwise must be a 20 bit number or ENGINE_FRAME_MISCONFIGURED
255
256 // -- 2 --
257 uint64_t pdata[14];
258};
259
260// 128 KiB
261struct engine_pair_t
262{
263 user_frame_t user[ 64 * 1024 / sizeof(user_frame_t)];
264 privileged_frame_t privileged[ 64 * 1024 / sizeof(privileged_frame_t)];
265};
266
267//
268// NOTE that we don't have a command that does some writes then some reads. For
269// the ACK this is probably not going to be much of a problem.
270//
271// On completion, an MSI will be sent if the msiaddress != 0.
272//
273enum cmd_t
274{
275 // ORDER IS IMPORTANT, see predicates later in this file.
276
277 // The frame was misconfigured.
278 ENGINE_FRAME_MISCONFIGURED = ~0u - 1,
279
280 // The engine encountered an error (downstream transaction aborted).
281 ENGINE_ERROR = ~0u,
282
283 // This frame is unimplemented or in use by the secure world.
284 //
285 // A user _can_ write this to cmd and it will be considered to be
286 // ENGINE_HALTED.
287 ENGINE_NO_FRAME = 0,
288
289 // The engine is halted.
290 ENGINE_HALTED = 1,
291
292 // The engine memcpy's from region [begin, end_incl] to address udata[0].
293 //
294 // If stride is 0 then ENGINE_ERROR is produced, udata[2] contains the error
295 // address. No MSI is generated.
296 //
297 // If stride is 1 then this is a normal memcpy(). If stride is larger then
298 // not all the data will be copied.
299 //
300 // The order and size of the transactions used are determined randomly using
301 // seed. If seed is:
302 // 0 -- do them from lowest address to highest address
303 // ~0u -- do them in reverse order
304 // otherwise use the value as a seed to do them in random order
305 // The ability to do them in a non-random order means that we stand a
306 // chance of getting merged event records.
307 //
308 // This models a work-load where we start with some reads and then do some
309 // writes.
310 ENGINE_MEMCPY = 2,
311
312 // The engine randomizes region [begin, end_incl] using rand48, seeded
313 // with seed and using the specified stride.
314 //
315 // The order and size of the transactions used are determined randomly using
316 // seed.
317 //
318 // The seed is used to create a random number generator that is used to
319 // choose the direction.
320 //
321 // A separate random number generator per transaction is then used based on
322 // seed and the address:
323 //
324 // seed_per_transaction = seed ^ (address >> 32) ^ (address & 0xFFFFffff);
325 //
326 // This seed is then used to seed a random number generator to fill the
327 // required space. The data used should be:
328 // uint64_t storage;
329 // for (uint8_t* p = (uintptr_t)begin; p != (uintptr_t)end_incl; ++ p)
330 // {
331 // // When we cross a 4 KiB we reseed.
332 // if ((p & 0xFFF) == 0 || p == begin)
333 // {
334 // testengine_random_seed_storage(
335 // V ^ ((uintptr_t)p >> 32) ^ (uint32_t((uintptr_t)p))
336 // );
337 // }
338 // assert( *p == (uint8_t)testengine_random(&storage) );
339 // ++ p;
340 // }
341 // This isn't the most efficient way of doing it as it throws away a lot of
342 // entropy from the call to testengine_random() but then we aren't aiming for
343 // good random numbers.
344 //
345 // If stride is 0 then ENGINE_ERROR is produced, data[2] contains the error
346 // address. (NOTE that data[1] is not used).
347 //
348 // If stride is 1 then this fills the entire buffer. If stride is larger
349 // then not all the data will be randomized.
350 //
351 // This models a write-only work-load.
352 ENGINE_RAND48 = 3,
353
354 // The engine reads [begin, end_incl], treats the region as a set of
355 // uint64_t and sums them, delivering the result to udata[1], using the
356 // specified stride.
357 //
358 // If stride is 0 then ENGINE_ERROR is produced, udata[2] is the error
359 // address.
360 //
361 // If stride is 1 then this sums the entire buffer. If stride is larger
362 // then not all the data will be summed.
363 //
364 // The order and size of the transactions used are determined randomly using
365 // seed.
366 //
367 // The begin must be 64 bit aligned (begin & 7) == 0 and the end_incl must
368 // end at the end of a 64 bit quantitity (end_incl & 7) == 7, otherwise
369 // ENGINE_FRAME_MISCONFIGURED is generated.
370 //
371 // This models a read-only work-load.
372 ENGINE_SUM64 = 4
373};
374
375static inline bool is_valid_and_running(cmd_t t_)
376{
377 unsigned const t = t_; // compensate for bad MSVC treating t_ as signed!
378 return ENGINE_MEMCPY <= t && t <= ENGINE_SUM64;
379}
380
381static inline bool is_in_error_state(cmd_t t_)
382{
383 return t_ == ENGINE_ERROR || t_ == ENGINE_FRAME_MISCONFIGURED;
384}
385
386static inline bool is_in_error_or_stopped_state(cmd_t t_)
387{
388 return t_ == ENGINE_NO_FRAME
389 || t_ == ENGINE_HALTED
390 || is_in_error_state(t_);
391}
392
393static inline bool is_invalid(cmd_t t_)
394{
395 unsigned const t = t_; // compensate for bad MSVC treating t_ as signed!
396 return ENGINE_SUM64 < t && t < ENGINE_FRAME_MISCONFIGURED;
397}
398
399/// pctrl has layout
400///
401/// 0 -- SSD_NS -- the stream and frame is non-secure
402/// -- note that if this is zero then it means the
403/// frame is controlled by secure SW and non-secure
404/// accesses are RAZ/WI (and so see ENGINE_NO_FRAME)
405/// Secure SW can only generate secure SSD StreamIDs
406/// This could be relaxed in the future if people need
407/// to.
408///
409/// 8 -- ATS_ENABLE -- CURRENTLY HAS NO EFFECT
410/// 9 -- PRI_ENABLE -- CURRENTLY HAS NO EFFECT
411///
412/// SSD_NS can only be altered by a secure access. Once clear then the
413/// corresponding user and privileged frames are accessible only to secure
414/// accesses. Non-secure accesses are RAZ/WI (and hence cmd will be
415/// ENGINE_NO_FRAME to non-secure accesses).
416///
417/// ATS_ENABLE/PRI_ENABLE are not currently implemented and their intent is for
418/// per-substreamid ATS/PRI support.
419///
420/// However, ATS/PRI support for the whole StreamID is advertised through the
421/// PCIe Extended Capabilities Header.
422///
423
424/// uctrl has layout
425///
426/// 0 -- MSI_ABORTED -- an MSI aborted (set by the engine)
427///
428/// 16-31 -- RATE -- some ill-defined metric for how fast to do the work!
429///
430
431#endif