blob: 3fe03c2ccf06c9a3094bef88db38c916d2bbf44d [file] [log] [blame]
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +01001#include <linux/hrtimer.h>
2#include <linux/init.h>
3#include <linux/kernel.h>
4#include <linux/kthread.h>
Wedson Almeida Filhof9e11922018-08-12 15:54:31 +01005#include <linux/mm.h>
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +01006#include <linux/module.h>
7#include <linux/sched/task.h>
8#include <linux/slab.h>
9
Andrew Scull55704232018-08-10 17:19:54 +010010#include <hf/call.h>
11
Andrew Scull82257c42018-10-01 10:37:48 +010012#define CONFIG_HAFNIUM_MAX_VMS 16
13#define CONFIG_HAFNIUM_MAX_VCPUS 32
14
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +010015struct hf_vcpu {
16 spinlock_t lock;
Andrew Scullb722f952018-09-27 15:39:10 +010017 struct hf_vm *vm;
Andrew Scull55704232018-08-10 17:19:54 +010018 uint32_t vcpu_index;
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +010019 struct task_struct *task;
20 struct hrtimer timer;
21 bool pending_irq;
22};
23
24struct hf_vm {
Andrew Scullb722f952018-09-27 15:39:10 +010025 uint32_t id;
Andrew Scullbb7ae412018-09-28 21:07:15 +010026 uint32_t vcpu_count;
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +010027 struct hf_vcpu *vcpu;
28};
29
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +010030static struct hf_vm *hf_vms;
Andrew Scullbb7ae412018-09-28 21:07:15 +010031static uint32_t hf_vm_count;
Wedson Almeida Filhof9e11922018-08-12 15:54:31 +010032static struct page *hf_send_page = NULL;
33static struct page *hf_recv_page = NULL;
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +010034
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +010035/**
36 * Wakes up the thread associated with the vcpu that owns the given timer. This
37 * is called when the timer the thread is waiting on expires.
38 */
39static enum hrtimer_restart hf_vcpu_timer_expired(struct hrtimer *timer)
40{
41 struct hf_vcpu *vcpu = container_of(timer, struct hf_vcpu, timer);
42 wake_up_process(vcpu->task);
43 return HRTIMER_NORESTART;
44}
45
46/**
47 * This is the main loop of each vcpu.
48 */
49static int hf_vcpu_thread(void *data)
50{
51 struct hf_vcpu *vcpu = data;
Andrew Sculldc8cab52018-10-10 18:29:39 +010052 struct hf_vcpu_run_return ret;
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +010053
54 hrtimer_init(&vcpu->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
55 vcpu->timer.function = &hf_vcpu_timer_expired;
56
57 while (!kthread_should_stop()) {
58 unsigned long flags;
59 size_t irqs;
60
61 set_current_state(TASK_RUNNING);
62
63 /* Determine if we must interrupt the vcpu. */
64 spin_lock_irqsave(&vcpu->lock, flags);
65 irqs = vcpu->pending_irq ? 1 : 0;
66 vcpu->pending_irq = false;
67 spin_unlock_irqrestore(&vcpu->lock, flags);
68
Andrew Scullbb7ae412018-09-28 21:07:15 +010069 /* Call into Hafnium to run vcpu. */
Andrew Scullb722f952018-09-27 15:39:10 +010070 ret = hf_vcpu_run(vcpu->vm->id, vcpu->vcpu_index);
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +010071
Andrew Sculldc8cab52018-10-10 18:29:39 +010072 switch (ret.code) {
Andrew Scullb3a61b52018-09-17 14:30:34 +010073 /* Yield (forcibly or voluntarily). */
74 case HF_VCPU_RUN_YIELD:
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +010075 break;
76
Andrew Scullb3a61b52018-09-17 14:30:34 +010077 /* WFI. */
78 case HF_VCPU_RUN_WAIT_FOR_INTERRUPT:
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +010079 set_current_state(TASK_INTERRUPTIBLE);
80 if (kthread_should_stop())
81 break;
82 schedule();
83 break;
84
Andrew Scullb3a61b52018-09-17 14:30:34 +010085 /* Wake up another vcpu. */
86 case HF_VCPU_RUN_WAKE_UP:
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +010087 {
Andrew Scull0973a2e2018-10-05 11:11:24 +010088 struct hf_vm *vm;
Andrew Sculldc8cab52018-10-10 18:29:39 +010089 if (ret.wake_up.vm_id > hf_vm_count)
Andrew Scull0973a2e2018-10-05 11:11:24 +010090 break;
Andrew Sculldc8cab52018-10-10 18:29:39 +010091 vm = &hf_vms[ret.wake_up.vm_id - 1];
92 if (ret.wake_up.vcpu < vm->vcpu_count) {
93 wake_up_process(vm->vcpu[ret.wake_up.vcpu].task);
94 } else if (ret.wake_up.vcpu == HF_INVALID_VCPU) {
Andrew Scull0973a2e2018-10-05 11:11:24 +010095 /* TODO: pick one to interrupt. */
96 pr_warning("No vcpu to wake.");
97 }
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +010098 }
99 break;
Wedson Almeida Filhof9e11922018-08-12 15:54:31 +0100100
Andrew Scullb3a61b52018-09-17 14:30:34 +0100101 /* Response available. */
Andrew Scull0973a2e2018-10-05 11:11:24 +0100102 case HF_VCPU_RUN_MESSAGE:
Wedson Almeida Filhof9e11922018-08-12 15:54:31 +0100103 {
Andrew Sculldc8cab52018-10-10 18:29:39 +0100104 uint32_t i;
Wedson Almeida Filhof9e11922018-08-12 15:54:31 +0100105 const char *buf = page_address(hf_recv_page);
Andrew Scull0973a2e2018-10-05 11:11:24 +0100106 pr_info("Received response from vm %u (%u bytes): ",
Andrew Sculldc8cab52018-10-10 18:29:39 +0100107 vcpu->vm->id, ret.message.size);
108 for (i = 0; i < ret.message.size; i++)
Wedson Almeida Filhof9e11922018-08-12 15:54:31 +0100109 printk(KERN_CONT "%c", buf[i]);
110 printk(KERN_CONT "\n");
Andrew Scull0973a2e2018-10-05 11:11:24 +0100111 hf_mailbox_clear();
Wedson Almeida Filhof9e11922018-08-12 15:54:31 +0100112 }
113 break;
Andrew Sculldc8cab52018-10-10 18:29:39 +0100114
115 case HF_VCPU_RUN_SLEEP:
116 set_current_state(TASK_INTERRUPTIBLE);
117 if (kthread_should_stop())
118 break;
119 hrtimer_start(&vcpu->timer, ret.sleep.ns, HRTIMER_MODE_REL);
120 schedule();
121 hrtimer_cancel(&vcpu->timer);
122 break;
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100123 }
124 }
125
126 set_current_state(TASK_RUNNING);
127
128 return 0;
129}
130
131/**
Andrew Scullbb7ae412018-09-28 21:07:15 +0100132 * Frees all resources, including threads, associated with the Hafnium driver.
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100133 */
Andrew Scull82257c42018-10-01 10:37:48 +0100134static void hf_free_resources(void)
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100135{
Andrew Scullbb7ae412018-09-28 21:07:15 +0100136 uint32_t i, j;
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100137
138 /*
139 * First stop all worker threads. We need to do this before freeing
140 * resources because workers may reference each other, so it is only
141 * safe to free resources after they have all stopped.
142 */
Andrew Scull82257c42018-10-01 10:37:48 +0100143 for (i = 0; i < hf_vm_count; i++) {
Andrew Scullb3a61b52018-09-17 14:30:34 +0100144 struct hf_vm *vm = &hf_vms[i];
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100145 for (j = 0; j < vm->vcpu_count; j++)
146 kthread_stop(vm->vcpu[j].task);
147 }
148
149 /* Free resources. */
Andrew Scull82257c42018-10-01 10:37:48 +0100150 for (i = 0; i < hf_vm_count; i++) {
Andrew Scullb3a61b52018-09-17 14:30:34 +0100151 struct hf_vm *vm = &hf_vms[i];
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100152 for (j = 0; j < vm->vcpu_count; j++)
153 put_task_struct(vm->vcpu[j].task);
154 kfree(vm->vcpu);
155 }
156
157 kfree(hf_vms);
158}
159
160static ssize_t hf_interrupt_store(struct kobject *kobj,
161 struct kobj_attribute *attr, const char *buf,
162 size_t count)
163{
164 struct hf_vcpu *vcpu;
165 unsigned long flags;
166 struct task_struct *task;
167
168 /* TODO: Parse input to determine which vcpu to interrupt. */
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100169 /* TODO: Check bounds. */
170
Andrew Scullb3a61b52018-09-17 14:30:34 +0100171 vcpu = &hf_vms[0].vcpu[0];
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100172
173 spin_lock_irqsave(&vcpu->lock, flags);
174 vcpu->pending_irq = true;
175 /* TODO: Do we need to increment the task's ref count here? */
176 task = vcpu->task;
177 spin_unlock_irqrestore(&vcpu->lock, flags);
178
179 /* Wake up the task. If it's already running, kick it out. */
180 /* TODO: There's a race here: the kick may happen right before we go
181 * to the hypervisor. */
182 if (wake_up_process(task) == 0)
183 kick_process(task);
184
185 return count;
186}
187
Wedson Almeida Filhof9e11922018-08-12 15:54:31 +0100188static ssize_t hf_send_store(struct kobject *kobj, struct kobj_attribute *attr,
189 const char *buf, size_t count)
190{
Andrew Scullbb7ae412018-09-28 21:07:15 +0100191 int64_t ret;
Wedson Almeida Filhof9e11922018-08-12 15:54:31 +0100192 struct hf_vm *vm;
193
Andrew Scull0973a2e2018-10-05 11:11:24 +0100194 count = min_t(size_t, count, HF_MAILBOX_SIZE);
Wedson Almeida Filhof9e11922018-08-12 15:54:31 +0100195
196 /* Copy data to send buffer. */
197 memcpy(page_address(hf_send_page), buf, count);
Andrew Scullb722f952018-09-27 15:39:10 +0100198
199 vm = &hf_vms[0];
Andrew Scull0973a2e2018-10-05 11:11:24 +0100200 ret = hf_mailbox_send(vm->id, count);
Wedson Almeida Filhof9e11922018-08-12 15:54:31 +0100201 if (ret < 0)
202 return -EAGAIN;
203
Andrew Scull0973a2e2018-10-05 11:11:24 +0100204 if (ret == HF_INVALID_VCPU) {
205 /*
206 * TODO: We need to interrupt some vcpu because none are waiting
207 * for data.
208 */
209 pr_warning("No vcpu to receive message.");
210 return -ENOSYS;
211 }
212
213 if (ret >= vm->vcpu_count)
Wedson Almeida Filhof9e11922018-08-12 15:54:31 +0100214 return -EINVAL;
215
Andrew Scull0973a2e2018-10-05 11:11:24 +0100216 /* Wake up the vcpu that is going to process the data. */
217 /* TODO: There's a race where thread may get wake up before it
218 * goes to sleep. Fix this. */
219 wake_up_process(vm->vcpu[ret].task);
Wedson Almeida Filhof9e11922018-08-12 15:54:31 +0100220
221 return count;
222}
223
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100224static struct kobject *hf_sysfs_obj = NULL;
225static struct kobj_attribute interrupt_attr =
226 __ATTR(interrupt, 0200, NULL, hf_interrupt_store);
Wedson Almeida Filhof9e11922018-08-12 15:54:31 +0100227static struct kobj_attribute send_attr =
228 __ATTR(send, 0200, NULL, hf_send_store);
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100229
230/**
Andrew Scullbb7ae412018-09-28 21:07:15 +0100231 * Initializes the Hafnium driver's sysfs interface.
232 */
233static void __init hf_init_sysfs(void)
234{
235 int ret;
236
237 /* Create the sysfs interface to interrupt vcpus. */
238 hf_sysfs_obj = kobject_create_and_add("hafnium", kernel_kobj);
239 if (!hf_sysfs_obj) {
240 pr_err("Unable to create sysfs object");
241 } else {
242 ret = sysfs_create_file(hf_sysfs_obj, &interrupt_attr.attr);
243 if (ret)
244 pr_err("Unable to create 'interrupt' sysfs file");
245
246 ret = sysfs_create_file(hf_sysfs_obj, &send_attr.attr);
247 if (ret)
248 pr_err("Unable to create 'send' sysfs file");
249 }
250}
251
252/**
253 * Initializes the Hafnium driver by creating a thread for each vCPU of each
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100254 * virtual machine.
255 */
256static int __init hf_init(void)
257{
Andrew Scullbb7ae412018-09-28 21:07:15 +0100258 int64_t ret;
259 uint32_t i, j;
Andrew Scull82257c42018-10-01 10:37:48 +0100260 uint32_t total_vm_count;
261 uint32_t total_vcpu_count;
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100262
Wedson Almeida Filhof9e11922018-08-12 15:54:31 +0100263 /* Allocate a page for send and receive buffers. */
264 hf_send_page = alloc_page(GFP_KERNEL);
265 if (!hf_send_page) {
266 pr_err("Unable to allocate send buffer\n");
267 return -ENOMEM;
268 }
269
270 hf_recv_page = alloc_page(GFP_KERNEL);
271 if (!hf_recv_page) {
272 __free_page(hf_send_page);
273 pr_err("Unable to allocate receive buffer\n");
274 return -ENOMEM;
275 }
276
277 /*
278 * Configure both addresses. Once configured, we cannot free these pages
279 * because the hypervisor will use them, even if the module is
280 * unloaded.
281 */
Andrew Scull55704232018-08-10 17:19:54 +0100282 ret = hf_vm_configure(page_to_phys(hf_send_page),
283 page_to_phys(hf_recv_page));
Wedson Almeida Filhof9e11922018-08-12 15:54:31 +0100284 if (ret) {
285 __free_page(hf_send_page);
286 __free_page(hf_recv_page);
287 /* TODO: We may want to grab this information from hypervisor
288 * and go from there. */
289 pr_err("Unable to configure VM\n");
290 return -EIO;
291 }
292
Andrew Scull82257c42018-10-01 10:37:48 +0100293 /* Get the number of VMs. */
Andrew Scull55704232018-08-10 17:19:54 +0100294 ret = hf_vm_get_count();
Andrew Scull82257c42018-10-01 10:37:48 +0100295 if (ret < 0) {
Andrew Scullbb7ae412018-09-28 21:07:15 +0100296 pr_err("Unable to retrieve number of VMs: %lld\n", ret);
Andrew Scull82257c42018-10-01 10:37:48 +0100297 return -EIO;
298 }
299
300 /* Confirm the maximum number of VMs looks sane. */
301 BUILD_BUG_ON(CONFIG_HAFNIUM_MAX_VMS < 1);
302 BUILD_BUG_ON(CONFIG_HAFNIUM_MAX_VMS > U16_MAX);
303
304 /* Validate the number of VMs. There must at least be the primary. */
305 if (ret < 1 || ret > CONFIG_HAFNIUM_MAX_VMS) {
306 pr_err("Number of VMs is out of range: %lld\n", ret);
307 return -EDQUOT;
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100308 }
309
Andrew Scullb722f952018-09-27 15:39:10 +0100310 /* Only track the secondary VMs. */
Andrew Scull82257c42018-10-01 10:37:48 +0100311 total_vm_count = ret - 1;
312 hf_vms = kmalloc(sizeof(struct hf_vm) * total_vm_count, GFP_KERNEL);
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100313 if (!hf_vms)
314 return -ENOMEM;
315
316 /* Initialize each VM. */
Andrew Scull82257c42018-10-01 10:37:48 +0100317 total_vcpu_count = 0;
318 for (i = 0; i < total_vm_count; i++) {
Andrew Scullb3a61b52018-09-17 14:30:34 +0100319 struct hf_vm *vm = &hf_vms[i];
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100320
Andrew Scullb722f952018-09-27 15:39:10 +0100321 /* Adjust the ID as only the secondaries are tracked. */
322 vm->id = i + 1;
323
324 ret = hf_vcpu_get_count(vm->id);
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100325 if (ret < 0) {
Andrew Scull82257c42018-10-01 10:37:48 +0100326 pr_err("HF_VCPU_GET_COUNT failed for vm=%u: %lld", vm->id,
327 ret);
328 ret = -EIO;
329 goto fail_with_cleanup;
330 }
331
332 /* Avoid overflowing the vcpu count. */
333 if (ret > (U32_MAX - total_vcpu_count)) {
334 pr_err("Too many vcpus: %u\n", total_vcpu_count);
335 ret = -EDQUOT;
336 goto fail_with_cleanup;
337 }
338
339 /* Confirm the maximum number of VCPUs looks sane. */
340 BUILD_BUG_ON(CONFIG_HAFNIUM_MAX_VCPUS < 1);
341 BUILD_BUG_ON(CONFIG_HAFNIUM_MAX_VCPUS > U16_MAX);
342
343 /* Enforce the limit on vcpus. */
344 total_vcpu_count += ret;
345 if (total_vcpu_count > CONFIG_HAFNIUM_MAX_VCPUS) {
346 pr_err("Too many vcpus: %u\n", total_vcpu_count);
347 ret = -EDQUOT;
348 goto fail_with_cleanup;
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100349 }
350
351 vm->vcpu_count = ret;
352 vm->vcpu = kmalloc(sizeof(struct hf_vcpu) * vm->vcpu_count,
353 GFP_KERNEL);
354 if (!vm->vcpu) {
Andrew Scullbb7ae412018-09-28 21:07:15 +0100355 pr_err("No memory for %u vcpus for vm %u",
Andrew Scullb722f952018-09-27 15:39:10 +0100356 vm->vcpu_count, vm->id);
Andrew Scull82257c42018-10-01 10:37:48 +0100357 ret = -ENOMEM;
358 goto fail_with_cleanup;
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100359 }
360
Andrew Scull82257c42018-10-01 10:37:48 +0100361 /* Update the number of initialized VMs. */
362 hf_vm_count = i + 1;
363
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100364 /* Create a kernel thread for each vcpu. */
365 for (j = 0; j < vm->vcpu_count; j++) {
Andrew Scullb3a61b52018-09-17 14:30:34 +0100366 struct hf_vcpu *vcpu = &vm->vcpu[j];
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100367 vcpu->task = kthread_create(hf_vcpu_thread, vcpu,
Andrew Scullbb7ae412018-09-28 21:07:15 +0100368 "vcpu_thread_%u_%u",
Andrew Scullb722f952018-09-27 15:39:10 +0100369 vm->id, j);
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100370 if (IS_ERR(vcpu->task)) {
Andrew Scullbb7ae412018-09-28 21:07:15 +0100371 pr_err("Error creating task (vm=%u,vcpu=%u)"
Andrew Scullb722f952018-09-27 15:39:10 +0100372 ": %ld\n", vm->id, j, PTR_ERR(vcpu->task));
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100373 vm->vcpu_count = j;
Andrew Scull82257c42018-10-01 10:37:48 +0100374 ret = PTR_ERR(vcpu->task);
375 goto fail_with_cleanup;
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100376 }
377
378 get_task_struct(vcpu->task);
379 spin_lock_init(&vcpu->lock);
Andrew Scullb722f952018-09-27 15:39:10 +0100380 vcpu->vm = vm;
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100381 vcpu->vcpu_index = j;
382 vcpu->pending_irq = false;
383 }
384 }
385
386 /* Start running threads now that all is initialized. */
387 for (i = 0; i < hf_vm_count; i++) {
Andrew Scullb3a61b52018-09-17 14:30:34 +0100388 struct hf_vm *vm = &hf_vms[i];
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100389 for (j = 0; j < vm->vcpu_count; j++)
390 wake_up_process(vm->vcpu[j].task);
391 }
392
393 /* Dump vm/vcpu count info. */
Andrew Scullbb7ae412018-09-28 21:07:15 +0100394 pr_info("Hafnium successfully loaded with %u VMs:\n", hf_vm_count);
Andrew Scullb722f952018-09-27 15:39:10 +0100395 for (i = 0; i < hf_vm_count; i++) {
396 struct hf_vm *vm = &hf_vms[i];
Andrew Scullbb7ae412018-09-28 21:07:15 +0100397 pr_info("\tVM %u: %u vCPUS\n", vm->id, vm->vcpu_count);
Andrew Scullb722f952018-09-27 15:39:10 +0100398 }
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100399
Andrew Scullbb7ae412018-09-28 21:07:15 +0100400 hf_init_sysfs();
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100401
402 return 0;
Andrew Scull82257c42018-10-01 10:37:48 +0100403
404fail_with_cleanup:
405 hf_free_resources();
406 return ret;
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100407}
408
409/**
Andrew Scullbb7ae412018-09-28 21:07:15 +0100410 * Frees up all resources used by the Hafnium driver in preparation for
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100411 * unloading it.
412 */
413static void __exit hf_exit(void)
414{
415 if (hf_sysfs_obj)
416 kobject_put(hf_sysfs_obj);
417
Andrew Scullbb7ae412018-09-28 21:07:15 +0100418 pr_info("Preparing to unload Hafnium\n");
Andrew Scull82257c42018-10-01 10:37:48 +0100419 hf_free_resources();
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100420 pr_info("Hafnium ready to unload\n");
421}
422
423MODULE_LICENSE("GPL");
424
425module_init(hf_init);
426module_exit(hf_exit);