blob: efa00cc38ac97aea9f96dde5fe4688e928cf7306 [file] [log] [blame]
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +01001#include <linux/hrtimer.h>
2#include <linux/init.h>
3#include <linux/kernel.h>
4#include <linux/kthread.h>
Wedson Almeida Filhof9e11922018-08-12 15:54:31 +01005#include <linux/mm.h>
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +01006#include <linux/module.h>
7#include <linux/sched/task.h>
8#include <linux/slab.h>
9
Andrew Scull55704232018-08-10 17:19:54 +010010#include <hf/call.h>
11
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +010012struct hf_vcpu {
13 spinlock_t lock;
Andrew Scullb722f952018-09-27 15:39:10 +010014 struct hf_vm *vm;
Andrew Scull55704232018-08-10 17:19:54 +010015 uint32_t vcpu_index;
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +010016 struct task_struct *task;
17 struct hrtimer timer;
18 bool pending_irq;
19};
20
21struct hf_vm {
Andrew Scullb722f952018-09-27 15:39:10 +010022 uint32_t id;
Andrew Scullbb7ae412018-09-28 21:07:15 +010023 uint32_t vcpu_count;
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +010024 struct hf_vcpu *vcpu;
25};
26
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +010027static struct hf_vm *hf_vms;
Andrew Scullbb7ae412018-09-28 21:07:15 +010028static uint32_t hf_vm_count;
Wedson Almeida Filhof9e11922018-08-12 15:54:31 +010029static struct page *hf_send_page = NULL;
30static struct page *hf_recv_page = NULL;
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +010031
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +010032/**
33 * Wakes up the thread associated with the vcpu that owns the given timer. This
34 * is called when the timer the thread is waiting on expires.
35 */
36static enum hrtimer_restart hf_vcpu_timer_expired(struct hrtimer *timer)
37{
38 struct hf_vcpu *vcpu = container_of(timer, struct hf_vcpu, timer);
39 wake_up_process(vcpu->task);
40 return HRTIMER_NORESTART;
41}
42
43/**
44 * This is the main loop of each vcpu.
45 */
46static int hf_vcpu_thread(void *data)
47{
48 struct hf_vcpu *vcpu = data;
Andrew Scullbb7ae412018-09-28 21:07:15 +010049 int64_t ret;
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +010050
51 hrtimer_init(&vcpu->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
52 vcpu->timer.function = &hf_vcpu_timer_expired;
53
54 while (!kthread_should_stop()) {
55 unsigned long flags;
56 size_t irqs;
57
58 set_current_state(TASK_RUNNING);
59
60 /* Determine if we must interrupt the vcpu. */
61 spin_lock_irqsave(&vcpu->lock, flags);
62 irqs = vcpu->pending_irq ? 1 : 0;
63 vcpu->pending_irq = false;
64 spin_unlock_irqrestore(&vcpu->lock, flags);
65
Andrew Scullbb7ae412018-09-28 21:07:15 +010066 /* Call into Hafnium to run vcpu. */
Andrew Scullb722f952018-09-27 15:39:10 +010067 ret = hf_vcpu_run(vcpu->vm->id, vcpu->vcpu_index);
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +010068
69 /* A negative return value indicates that this vcpu needs to
70 * sleep for the given number of nanoseconds.
71 */
72 if (ret < 0) {
73 set_current_state(TASK_INTERRUPTIBLE);
74 if (kthread_should_stop())
75 break;
76 hrtimer_start(&vcpu->timer, -ret, HRTIMER_MODE_REL);
77 schedule();
78 hrtimer_cancel(&vcpu->timer);
79 continue;
80 }
81
Andrew Scullb3a61b52018-09-17 14:30:34 +010082 switch (HF_VCPU_RUN_CODE(ret)) {
83 /* Yield (forcibly or voluntarily). */
84 case HF_VCPU_RUN_YIELD:
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +010085 break;
86
Andrew Scullb3a61b52018-09-17 14:30:34 +010087 /* WFI. */
88 case HF_VCPU_RUN_WAIT_FOR_INTERRUPT:
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +010089 set_current_state(TASK_INTERRUPTIBLE);
90 if (kthread_should_stop())
91 break;
92 schedule();
93 break;
94
Andrew Scullb3a61b52018-09-17 14:30:34 +010095 /* Wake up another vcpu. */
96 case HF_VCPU_RUN_WAKE_UP:
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +010097 {
Andrew Scull0973a2e2018-10-05 11:11:24 +010098 uint32_t vm_id = HF_VCPU_RUN_VM_ID(ret);
99 uint32_t vcpu = HF_VCPU_RUN_DATA(ret);
100 struct hf_vm *vm;
101 if (vm_id > hf_vm_count)
102 break;
103 vm = &hf_vms[vm_id - 1];
104 if (vcpu < vm->vcpu_count) {
105 wake_up_process(vm->vcpu[vcpu].task);
106 } else if (vcpu == HF_INVALID_VCPU) {
107 /* TODO: pick one to interrupt. */
108 pr_warning("No vcpu to wake.");
109 }
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100110 }
111 break;
Wedson Almeida Filhof9e11922018-08-12 15:54:31 +0100112
Andrew Scullb3a61b52018-09-17 14:30:34 +0100113 /* Response available. */
Andrew Scull0973a2e2018-10-05 11:11:24 +0100114 case HF_VCPU_RUN_MESSAGE:
Wedson Almeida Filhof9e11922018-08-12 15:54:31 +0100115 {
Andrew Scull0973a2e2018-10-05 11:11:24 +0100116 size_t i;
117 uint32_t count = HF_VCPU_RUN_DATA(ret);
Wedson Almeida Filhof9e11922018-08-12 15:54:31 +0100118 const char *buf = page_address(hf_recv_page);
Andrew Scull0973a2e2018-10-05 11:11:24 +0100119 pr_info("Received response from vm %u (%u bytes): ",
120 vcpu->vm->id, count);
Wedson Almeida Filhof9e11922018-08-12 15:54:31 +0100121 for (i = 0; i < count; i++)
122 printk(KERN_CONT "%c", buf[i]);
123 printk(KERN_CONT "\n");
Andrew Scull0973a2e2018-10-05 11:11:24 +0100124 hf_mailbox_clear();
Wedson Almeida Filhof9e11922018-08-12 15:54:31 +0100125 }
126 break;
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100127 }
128 }
129
130 set_current_state(TASK_RUNNING);
131
132 return 0;
133}
134
135/**
Andrew Scullbb7ae412018-09-28 21:07:15 +0100136 * Frees all resources, including threads, associated with the Hafnium driver.
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100137 */
Andrew Scullbb7ae412018-09-28 21:07:15 +0100138static void hf_free_resources(uint32_t vm_count)
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100139{
Andrew Scullbb7ae412018-09-28 21:07:15 +0100140 uint32_t i, j;
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100141
142 /*
143 * First stop all worker threads. We need to do this before freeing
144 * resources because workers may reference each other, so it is only
145 * safe to free resources after they have all stopped.
146 */
147 for (i = 0; i < vm_count; i++) {
Andrew Scullb3a61b52018-09-17 14:30:34 +0100148 struct hf_vm *vm = &hf_vms[i];
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100149 for (j = 0; j < vm->vcpu_count; j++)
150 kthread_stop(vm->vcpu[j].task);
151 }
152
153 /* Free resources. */
154 for (i = 0; i < vm_count; i++) {
Andrew Scullb3a61b52018-09-17 14:30:34 +0100155 struct hf_vm *vm = &hf_vms[i];
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100156 for (j = 0; j < vm->vcpu_count; j++)
157 put_task_struct(vm->vcpu[j].task);
158 kfree(vm->vcpu);
159 }
160
161 kfree(hf_vms);
162}
163
164static ssize_t hf_interrupt_store(struct kobject *kobj,
165 struct kobj_attribute *attr, const char *buf,
166 size_t count)
167{
168 struct hf_vcpu *vcpu;
169 unsigned long flags;
170 struct task_struct *task;
171
172 /* TODO: Parse input to determine which vcpu to interrupt. */
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100173 /* TODO: Check bounds. */
174
Andrew Scullb3a61b52018-09-17 14:30:34 +0100175 vcpu = &hf_vms[0].vcpu[0];
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100176
177 spin_lock_irqsave(&vcpu->lock, flags);
178 vcpu->pending_irq = true;
179 /* TODO: Do we need to increment the task's ref count here? */
180 task = vcpu->task;
181 spin_unlock_irqrestore(&vcpu->lock, flags);
182
183 /* Wake up the task. If it's already running, kick it out. */
184 /* TODO: There's a race here: the kick may happen right before we go
185 * to the hypervisor. */
186 if (wake_up_process(task) == 0)
187 kick_process(task);
188
189 return count;
190}
191
Wedson Almeida Filhof9e11922018-08-12 15:54:31 +0100192static ssize_t hf_send_store(struct kobject *kobj, struct kobj_attribute *attr,
193 const char *buf, size_t count)
194{
Andrew Scullbb7ae412018-09-28 21:07:15 +0100195 int64_t ret;
Wedson Almeida Filhof9e11922018-08-12 15:54:31 +0100196 struct hf_vm *vm;
197
Andrew Scull0973a2e2018-10-05 11:11:24 +0100198 count = min_t(size_t, count, HF_MAILBOX_SIZE);
Wedson Almeida Filhof9e11922018-08-12 15:54:31 +0100199
200 /* Copy data to send buffer. */
201 memcpy(page_address(hf_send_page), buf, count);
Andrew Scullb722f952018-09-27 15:39:10 +0100202
203 vm = &hf_vms[0];
Andrew Scull0973a2e2018-10-05 11:11:24 +0100204 ret = hf_mailbox_send(vm->id, count);
Wedson Almeida Filhof9e11922018-08-12 15:54:31 +0100205 if (ret < 0)
206 return -EAGAIN;
207
Andrew Scull0973a2e2018-10-05 11:11:24 +0100208 if (ret == HF_INVALID_VCPU) {
209 /*
210 * TODO: We need to interrupt some vcpu because none are waiting
211 * for data.
212 */
213 pr_warning("No vcpu to receive message.");
214 return -ENOSYS;
215 }
216
217 if (ret >= vm->vcpu_count)
Wedson Almeida Filhof9e11922018-08-12 15:54:31 +0100218 return -EINVAL;
219
Andrew Scull0973a2e2018-10-05 11:11:24 +0100220 /* Wake up the vcpu that is going to process the data. */
221 /* TODO: There's a race where thread may get wake up before it
222 * goes to sleep. Fix this. */
223 wake_up_process(vm->vcpu[ret].task);
Wedson Almeida Filhof9e11922018-08-12 15:54:31 +0100224
225 return count;
226}
227
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100228static struct kobject *hf_sysfs_obj = NULL;
229static struct kobj_attribute interrupt_attr =
230 __ATTR(interrupt, 0200, NULL, hf_interrupt_store);
Wedson Almeida Filhof9e11922018-08-12 15:54:31 +0100231static struct kobj_attribute send_attr =
232 __ATTR(send, 0200, NULL, hf_send_store);
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100233
234/**
Andrew Scullbb7ae412018-09-28 21:07:15 +0100235 * Initializes the Hafnium driver's sysfs interface.
236 */
237static void __init hf_init_sysfs(void)
238{
239 int ret;
240
241 /* Create the sysfs interface to interrupt vcpus. */
242 hf_sysfs_obj = kobject_create_and_add("hafnium", kernel_kobj);
243 if (!hf_sysfs_obj) {
244 pr_err("Unable to create sysfs object");
245 } else {
246 ret = sysfs_create_file(hf_sysfs_obj, &interrupt_attr.attr);
247 if (ret)
248 pr_err("Unable to create 'interrupt' sysfs file");
249
250 ret = sysfs_create_file(hf_sysfs_obj, &send_attr.attr);
251 if (ret)
252 pr_err("Unable to create 'send' sysfs file");
253 }
254}
255
256/**
257 * Initializes the Hafnium driver by creating a thread for each vCPU of each
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100258 * virtual machine.
259 */
260static int __init hf_init(void)
261{
Andrew Scullbb7ae412018-09-28 21:07:15 +0100262 int64_t ret;
263 uint32_t i, j;
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100264
Wedson Almeida Filhof9e11922018-08-12 15:54:31 +0100265 /* Allocate a page for send and receive buffers. */
266 hf_send_page = alloc_page(GFP_KERNEL);
267 if (!hf_send_page) {
268 pr_err("Unable to allocate send buffer\n");
269 return -ENOMEM;
270 }
271
272 hf_recv_page = alloc_page(GFP_KERNEL);
273 if (!hf_recv_page) {
274 __free_page(hf_send_page);
275 pr_err("Unable to allocate receive buffer\n");
276 return -ENOMEM;
277 }
278
279 /*
280 * Configure both addresses. Once configured, we cannot free these pages
281 * because the hypervisor will use them, even if the module is
282 * unloaded.
283 */
Andrew Scull55704232018-08-10 17:19:54 +0100284 ret = hf_vm_configure(page_to_phys(hf_send_page),
285 page_to_phys(hf_recv_page));
Wedson Almeida Filhof9e11922018-08-12 15:54:31 +0100286 if (ret) {
287 __free_page(hf_send_page);
288 __free_page(hf_recv_page);
289 /* TODO: We may want to grab this information from hypervisor
290 * and go from there. */
291 pr_err("Unable to configure VM\n");
292 return -EIO;
293 }
294
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100295 /* Get the number of VMs and allocate storage for them. */
Andrew Scull55704232018-08-10 17:19:54 +0100296 ret = hf_vm_get_count();
Andrew Scullb722f952018-09-27 15:39:10 +0100297 if (ret < 1) {
Andrew Scullbb7ae412018-09-28 21:07:15 +0100298 pr_err("Unable to retrieve number of VMs: %lld\n", ret);
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100299 return ret;
300 }
301
Andrew Scullb722f952018-09-27 15:39:10 +0100302 /* Only track the secondary VMs. */
303 hf_vm_count = ret - 1;
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100304 hf_vms = kmalloc(sizeof(struct hf_vm) * hf_vm_count, GFP_KERNEL);
305 if (!hf_vms)
306 return -ENOMEM;
307
308 /* Initialize each VM. */
309 for (i = 0; i < hf_vm_count; i++) {
Andrew Scullb3a61b52018-09-17 14:30:34 +0100310 struct hf_vm *vm = &hf_vms[i];
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100311
Andrew Scullb722f952018-09-27 15:39:10 +0100312 /* Adjust the ID as only the secondaries are tracked. */
313 vm->id = i + 1;
314
315 ret = hf_vcpu_get_count(vm->id);
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100316 if (ret < 0) {
Andrew Scullbb7ae412018-09-28 21:07:15 +0100317 pr_err("HF_VCPU_GET_COUNT failed for vm=%u: %lld",
318 vm->id, ret);
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100319 hf_free_resources(i);
320 return ret;
321 }
322
323 vm->vcpu_count = ret;
324 vm->vcpu = kmalloc(sizeof(struct hf_vcpu) * vm->vcpu_count,
325 GFP_KERNEL);
326 if (!vm->vcpu) {
Andrew Scullbb7ae412018-09-28 21:07:15 +0100327 pr_err("No memory for %u vcpus for vm %u",
Andrew Scullb722f952018-09-27 15:39:10 +0100328 vm->vcpu_count, vm->id);
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100329 hf_free_resources(i);
330 return -ENOMEM;
331 }
332
333 /* Create a kernel thread for each vcpu. */
334 for (j = 0; j < vm->vcpu_count; j++) {
Andrew Scullb3a61b52018-09-17 14:30:34 +0100335 struct hf_vcpu *vcpu = &vm->vcpu[j];
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100336 vcpu->task = kthread_create(hf_vcpu_thread, vcpu,
Andrew Scullbb7ae412018-09-28 21:07:15 +0100337 "vcpu_thread_%u_%u",
Andrew Scullb722f952018-09-27 15:39:10 +0100338 vm->id, j);
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100339 if (IS_ERR(vcpu->task)) {
Andrew Scullbb7ae412018-09-28 21:07:15 +0100340 pr_err("Error creating task (vm=%u,vcpu=%u)"
Andrew Scullb722f952018-09-27 15:39:10 +0100341 ": %ld\n", vm->id, j, PTR_ERR(vcpu->task));
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100342 vm->vcpu_count = j;
343 hf_free_resources(i + 1);
344 return PTR_ERR(vcpu->task);
345 }
346
347 get_task_struct(vcpu->task);
348 spin_lock_init(&vcpu->lock);
Andrew Scullb722f952018-09-27 15:39:10 +0100349 vcpu->vm = vm;
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100350 vcpu->vcpu_index = j;
351 vcpu->pending_irq = false;
352 }
353 }
354
355 /* Start running threads now that all is initialized. */
356 for (i = 0; i < hf_vm_count; i++) {
Andrew Scullb3a61b52018-09-17 14:30:34 +0100357 struct hf_vm *vm = &hf_vms[i];
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100358 for (j = 0; j < vm->vcpu_count; j++)
359 wake_up_process(vm->vcpu[j].task);
360 }
361
362 /* Dump vm/vcpu count info. */
Andrew Scullbb7ae412018-09-28 21:07:15 +0100363 pr_info("Hafnium successfully loaded with %u VMs:\n", hf_vm_count);
Andrew Scullb722f952018-09-27 15:39:10 +0100364 for (i = 0; i < hf_vm_count; i++) {
365 struct hf_vm *vm = &hf_vms[i];
Andrew Scullbb7ae412018-09-28 21:07:15 +0100366 pr_info("\tVM %u: %u vCPUS\n", vm->id, vm->vcpu_count);
Andrew Scullb722f952018-09-27 15:39:10 +0100367 }
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100368
Andrew Scullbb7ae412018-09-28 21:07:15 +0100369 hf_init_sysfs();
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100370
371 return 0;
372}
373
374/**
Andrew Scullbb7ae412018-09-28 21:07:15 +0100375 * Frees up all resources used by the Hafnium driver in preparation for
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100376 * unloading it.
377 */
378static void __exit hf_exit(void)
379{
380 if (hf_sysfs_obj)
381 kobject_put(hf_sysfs_obj);
382
Andrew Scullbb7ae412018-09-28 21:07:15 +0100383 pr_info("Preparing to unload Hafnium\n");
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100384 hf_free_resources(hf_vm_count);
385 pr_info("Hafnium ready to unload\n");
386}
387
388MODULE_LICENSE("GPL");
389
390module_init(hf_init);
391module_exit(hf_exit);