blob: b80c3dbde0cdd92b3cfda3c19c9afbd7e840933e [file] [log] [blame]
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +01001#include <linux/hrtimer.h>
2#include <linux/init.h>
3#include <linux/kernel.h>
4#include <linux/kthread.h>
Wedson Almeida Filhof9e11922018-08-12 15:54:31 +01005#include <linux/mm.h>
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +01006#include <linux/module.h>
7#include <linux/sched/task.h>
8#include <linux/slab.h>
9
Andrew Scull55704232018-08-10 17:19:54 +010010#include <hf/call.h>
11
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +010012struct hf_vcpu {
13 spinlock_t lock;
Andrew Scullb722f952018-09-27 15:39:10 +010014 struct hf_vm *vm;
Andrew Scull55704232018-08-10 17:19:54 +010015 uint32_t vcpu_index;
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +010016 struct task_struct *task;
17 struct hrtimer timer;
18 bool pending_irq;
19};
20
21struct hf_vm {
Andrew Scullb722f952018-09-27 15:39:10 +010022 uint32_t id;
Andrew Scullbb7ae412018-09-28 21:07:15 +010023 uint32_t vcpu_count;
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +010024 struct hf_vcpu *vcpu;
25};
26
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +010027static struct hf_vm *hf_vms;
Andrew Scullbb7ae412018-09-28 21:07:15 +010028static uint32_t hf_vm_count;
Wedson Almeida Filhof9e11922018-08-12 15:54:31 +010029static struct page *hf_send_page = NULL;
30static struct page *hf_recv_page = NULL;
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +010031
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +010032/**
33 * Wakes up the thread associated with the vcpu that owns the given timer. This
34 * is called when the timer the thread is waiting on expires.
35 */
36static enum hrtimer_restart hf_vcpu_timer_expired(struct hrtimer *timer)
37{
38 struct hf_vcpu *vcpu = container_of(timer, struct hf_vcpu, timer);
39 wake_up_process(vcpu->task);
40 return HRTIMER_NORESTART;
41}
42
43/**
44 * This is the main loop of each vcpu.
45 */
46static int hf_vcpu_thread(void *data)
47{
48 struct hf_vcpu *vcpu = data;
Andrew Scullbb7ae412018-09-28 21:07:15 +010049 int64_t ret;
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +010050
51 hrtimer_init(&vcpu->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
52 vcpu->timer.function = &hf_vcpu_timer_expired;
53
54 while (!kthread_should_stop()) {
55 unsigned long flags;
56 size_t irqs;
57
58 set_current_state(TASK_RUNNING);
59
60 /* Determine if we must interrupt the vcpu. */
61 spin_lock_irqsave(&vcpu->lock, flags);
62 irqs = vcpu->pending_irq ? 1 : 0;
63 vcpu->pending_irq = false;
64 spin_unlock_irqrestore(&vcpu->lock, flags);
65
Andrew Scullbb7ae412018-09-28 21:07:15 +010066 /* Call into Hafnium to run vcpu. */
Andrew Scullb722f952018-09-27 15:39:10 +010067 ret = hf_vcpu_run(vcpu->vm->id, vcpu->vcpu_index);
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +010068
69 /* A negative return value indicates that this vcpu needs to
70 * sleep for the given number of nanoseconds.
71 */
72 if (ret < 0) {
73 set_current_state(TASK_INTERRUPTIBLE);
74 if (kthread_should_stop())
75 break;
76 hrtimer_start(&vcpu->timer, -ret, HRTIMER_MODE_REL);
77 schedule();
78 hrtimer_cancel(&vcpu->timer);
79 continue;
80 }
81
Andrew Scullb3a61b52018-09-17 14:30:34 +010082 switch (HF_VCPU_RUN_CODE(ret)) {
83 /* Yield (forcibly or voluntarily). */
84 case HF_VCPU_RUN_YIELD:
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +010085 break;
86
Andrew Scullb3a61b52018-09-17 14:30:34 +010087 /* WFI. */
88 case HF_VCPU_RUN_WAIT_FOR_INTERRUPT:
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +010089 set_current_state(TASK_INTERRUPTIBLE);
90 if (kthread_should_stop())
91 break;
92 schedule();
93 break;
94
Andrew Scullb3a61b52018-09-17 14:30:34 +010095 /* Wake up another vcpu. */
96 case HF_VCPU_RUN_WAKE_UP:
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +010097 {
Andrew Scullbb7ae412018-09-28 21:07:15 +010098 int32_t target = HF_VCPU_RUN_DATA(ret);
Andrew Scullb722f952018-09-27 15:39:10 +010099 struct hf_vm *vm = vcpu->vm;
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100100 if (target < vm->vcpu_count)
101 wake_up_process(vm->vcpu[target].task);
102 }
103 break;
Wedson Almeida Filhof9e11922018-08-12 15:54:31 +0100104
Andrew Scullb3a61b52018-09-17 14:30:34 +0100105 /* Response available. */
106 case HF_VCPU_RUN_RESPONSE_READY:
Wedson Almeida Filhof9e11922018-08-12 15:54:31 +0100107 {
Andrew Scullb3a61b52018-09-17 14:30:34 +0100108 size_t i, count = HF_VCPU_RUN_DATA(ret);
Wedson Almeida Filhof9e11922018-08-12 15:54:31 +0100109 const char *buf = page_address(hf_recv_page);
110 pr_info("Received response (%zu bytes): ",
111 count);
112 for (i = 0; i < count; i++)
113 printk(KERN_CONT "%c", buf[i]);
114 printk(KERN_CONT "\n");
Andrew Scull55704232018-08-10 17:19:54 +0100115 hf_rpc_ack();
Wedson Almeida Filhof9e11922018-08-12 15:54:31 +0100116 }
117 break;
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100118 }
119 }
120
121 set_current_state(TASK_RUNNING);
122
123 return 0;
124}
125
126/**
Andrew Scullbb7ae412018-09-28 21:07:15 +0100127 * Frees all resources, including threads, associated with the Hafnium driver.
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100128 */
Andrew Scullbb7ae412018-09-28 21:07:15 +0100129static void hf_free_resources(uint32_t vm_count)
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100130{
Andrew Scullbb7ae412018-09-28 21:07:15 +0100131 uint32_t i, j;
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100132
133 /*
134 * First stop all worker threads. We need to do this before freeing
135 * resources because workers may reference each other, so it is only
136 * safe to free resources after they have all stopped.
137 */
138 for (i = 0; i < vm_count; i++) {
Andrew Scullb3a61b52018-09-17 14:30:34 +0100139 struct hf_vm *vm = &hf_vms[i];
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100140 for (j = 0; j < vm->vcpu_count; j++)
141 kthread_stop(vm->vcpu[j].task);
142 }
143
144 /* Free resources. */
145 for (i = 0; i < vm_count; i++) {
Andrew Scullb3a61b52018-09-17 14:30:34 +0100146 struct hf_vm *vm = &hf_vms[i];
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100147 for (j = 0; j < vm->vcpu_count; j++)
148 put_task_struct(vm->vcpu[j].task);
149 kfree(vm->vcpu);
150 }
151
152 kfree(hf_vms);
153}
154
155static ssize_t hf_interrupt_store(struct kobject *kobj,
156 struct kobj_attribute *attr, const char *buf,
157 size_t count)
158{
159 struct hf_vcpu *vcpu;
160 unsigned long flags;
161 struct task_struct *task;
162
163 /* TODO: Parse input to determine which vcpu to interrupt. */
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100164 /* TODO: Check bounds. */
165
Andrew Scullb3a61b52018-09-17 14:30:34 +0100166 vcpu = &hf_vms[0].vcpu[0];
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100167
168 spin_lock_irqsave(&vcpu->lock, flags);
169 vcpu->pending_irq = true;
170 /* TODO: Do we need to increment the task's ref count here? */
171 task = vcpu->task;
172 spin_unlock_irqrestore(&vcpu->lock, flags);
173
174 /* Wake up the task. If it's already running, kick it out. */
175 /* TODO: There's a race here: the kick may happen right before we go
176 * to the hypervisor. */
177 if (wake_up_process(task) == 0)
178 kick_process(task);
179
180 return count;
181}
182
Wedson Almeida Filhof9e11922018-08-12 15:54:31 +0100183static ssize_t hf_send_store(struct kobject *kobj, struct kobj_attribute *attr,
184 const char *buf, size_t count)
185{
Andrew Scullbb7ae412018-09-28 21:07:15 +0100186 int64_t ret;
Wedson Almeida Filhof9e11922018-08-12 15:54:31 +0100187 struct hf_vm *vm;
188
Andrew Scullb3a61b52018-09-17 14:30:34 +0100189 count = min_t(size_t, count, HF_RPC_REQUEST_MAX_SIZE);
Wedson Almeida Filhof9e11922018-08-12 15:54:31 +0100190
191 /* Copy data to send buffer. */
192 memcpy(page_address(hf_send_page), buf, count);
Andrew Scullb722f952018-09-27 15:39:10 +0100193
194 vm = &hf_vms[0];
195 ret = hf_rpc_request(vm->id, count);
Wedson Almeida Filhof9e11922018-08-12 15:54:31 +0100196 if (ret < 0)
197 return -EAGAIN;
198
Wedson Almeida Filhof9e11922018-08-12 15:54:31 +0100199 if (ret > vm->vcpu_count)
200 return -EINVAL;
201
Andrew Scullb3a61b52018-09-17 14:30:34 +0100202 if (ret == vm->vcpu_count) {
Wedson Almeida Filhof9e11922018-08-12 15:54:31 +0100203 /*
204 * TODO: We need to interrupt some CPU because none is actually
205 * waiting for data.
206 */
207 } else {
208 /* Wake up the vcpu that is going to process the data. */
209 /* TODO: There's a race where thread may get wake up before it
210 * goes to sleep. Fix this. */
Andrew Scullb3a61b52018-09-17 14:30:34 +0100211 wake_up_process(vm->vcpu[ret].task);
Wedson Almeida Filhof9e11922018-08-12 15:54:31 +0100212 }
213
214 return count;
215}
216
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100217static struct kobject *hf_sysfs_obj = NULL;
218static struct kobj_attribute interrupt_attr =
219 __ATTR(interrupt, 0200, NULL, hf_interrupt_store);
Wedson Almeida Filhof9e11922018-08-12 15:54:31 +0100220static struct kobj_attribute send_attr =
221 __ATTR(send, 0200, NULL, hf_send_store);
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100222
223/**
Andrew Scullbb7ae412018-09-28 21:07:15 +0100224 * Initializes the Hafnium driver's sysfs interface.
225 */
226static void __init hf_init_sysfs(void)
227{
228 int ret;
229
230 /* Create the sysfs interface to interrupt vcpus. */
231 hf_sysfs_obj = kobject_create_and_add("hafnium", kernel_kobj);
232 if (!hf_sysfs_obj) {
233 pr_err("Unable to create sysfs object");
234 } else {
235 ret = sysfs_create_file(hf_sysfs_obj, &interrupt_attr.attr);
236 if (ret)
237 pr_err("Unable to create 'interrupt' sysfs file");
238
239 ret = sysfs_create_file(hf_sysfs_obj, &send_attr.attr);
240 if (ret)
241 pr_err("Unable to create 'send' sysfs file");
242 }
243}
244
245/**
246 * Initializes the Hafnium driver by creating a thread for each vCPU of each
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100247 * virtual machine.
248 */
249static int __init hf_init(void)
250{
Andrew Scullbb7ae412018-09-28 21:07:15 +0100251 int64_t ret;
252 uint32_t i, j;
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100253
Wedson Almeida Filhof9e11922018-08-12 15:54:31 +0100254 /* Allocate a page for send and receive buffers. */
255 hf_send_page = alloc_page(GFP_KERNEL);
256 if (!hf_send_page) {
257 pr_err("Unable to allocate send buffer\n");
258 return -ENOMEM;
259 }
260
261 hf_recv_page = alloc_page(GFP_KERNEL);
262 if (!hf_recv_page) {
263 __free_page(hf_send_page);
264 pr_err("Unable to allocate receive buffer\n");
265 return -ENOMEM;
266 }
267
268 /*
269 * Configure both addresses. Once configured, we cannot free these pages
270 * because the hypervisor will use them, even if the module is
271 * unloaded.
272 */
Andrew Scull55704232018-08-10 17:19:54 +0100273 ret = hf_vm_configure(page_to_phys(hf_send_page),
274 page_to_phys(hf_recv_page));
Wedson Almeida Filhof9e11922018-08-12 15:54:31 +0100275 if (ret) {
276 __free_page(hf_send_page);
277 __free_page(hf_recv_page);
278 /* TODO: We may want to grab this information from hypervisor
279 * and go from there. */
280 pr_err("Unable to configure VM\n");
281 return -EIO;
282 }
283
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100284 /* Get the number of VMs and allocate storage for them. */
Andrew Scull55704232018-08-10 17:19:54 +0100285 ret = hf_vm_get_count();
Andrew Scullb722f952018-09-27 15:39:10 +0100286 if (ret < 1) {
Andrew Scullbb7ae412018-09-28 21:07:15 +0100287 pr_err("Unable to retrieve number of VMs: %lld\n", ret);
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100288 return ret;
289 }
290
Andrew Scullb722f952018-09-27 15:39:10 +0100291 /* Only track the secondary VMs. */
292 hf_vm_count = ret - 1;
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100293 hf_vms = kmalloc(sizeof(struct hf_vm) * hf_vm_count, GFP_KERNEL);
294 if (!hf_vms)
295 return -ENOMEM;
296
297 /* Initialize each VM. */
298 for (i = 0; i < hf_vm_count; i++) {
Andrew Scullb3a61b52018-09-17 14:30:34 +0100299 struct hf_vm *vm = &hf_vms[i];
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100300
Andrew Scullb722f952018-09-27 15:39:10 +0100301 /* Adjust the ID as only the secondaries are tracked. */
302 vm->id = i + 1;
303
304 ret = hf_vcpu_get_count(vm->id);
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100305 if (ret < 0) {
Andrew Scullbb7ae412018-09-28 21:07:15 +0100306 pr_err("HF_VCPU_GET_COUNT failed for vm=%u: %lld",
307 vm->id, ret);
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100308 hf_free_resources(i);
309 return ret;
310 }
311
312 vm->vcpu_count = ret;
313 vm->vcpu = kmalloc(sizeof(struct hf_vcpu) * vm->vcpu_count,
314 GFP_KERNEL);
315 if (!vm->vcpu) {
Andrew Scullbb7ae412018-09-28 21:07:15 +0100316 pr_err("No memory for %u vcpus for vm %u",
Andrew Scullb722f952018-09-27 15:39:10 +0100317 vm->vcpu_count, vm->id);
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100318 hf_free_resources(i);
319 return -ENOMEM;
320 }
321
322 /* Create a kernel thread for each vcpu. */
323 for (j = 0; j < vm->vcpu_count; j++) {
Andrew Scullb3a61b52018-09-17 14:30:34 +0100324 struct hf_vcpu *vcpu = &vm->vcpu[j];
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100325 vcpu->task = kthread_create(hf_vcpu_thread, vcpu,
Andrew Scullbb7ae412018-09-28 21:07:15 +0100326 "vcpu_thread_%u_%u",
Andrew Scullb722f952018-09-27 15:39:10 +0100327 vm->id, j);
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100328 if (IS_ERR(vcpu->task)) {
Andrew Scullbb7ae412018-09-28 21:07:15 +0100329 pr_err("Error creating task (vm=%u,vcpu=%u)"
Andrew Scullb722f952018-09-27 15:39:10 +0100330 ": %ld\n", vm->id, j, PTR_ERR(vcpu->task));
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100331 vm->vcpu_count = j;
332 hf_free_resources(i + 1);
333 return PTR_ERR(vcpu->task);
334 }
335
336 get_task_struct(vcpu->task);
337 spin_lock_init(&vcpu->lock);
Andrew Scullb722f952018-09-27 15:39:10 +0100338 vcpu->vm = vm;
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100339 vcpu->vcpu_index = j;
340 vcpu->pending_irq = false;
341 }
342 }
343
344 /* Start running threads now that all is initialized. */
345 for (i = 0; i < hf_vm_count; i++) {
Andrew Scullb3a61b52018-09-17 14:30:34 +0100346 struct hf_vm *vm = &hf_vms[i];
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100347 for (j = 0; j < vm->vcpu_count; j++)
348 wake_up_process(vm->vcpu[j].task);
349 }
350
351 /* Dump vm/vcpu count info. */
Andrew Scullbb7ae412018-09-28 21:07:15 +0100352 pr_info("Hafnium successfully loaded with %u VMs:\n", hf_vm_count);
Andrew Scullb722f952018-09-27 15:39:10 +0100353 for (i = 0; i < hf_vm_count; i++) {
354 struct hf_vm *vm = &hf_vms[i];
Andrew Scullbb7ae412018-09-28 21:07:15 +0100355 pr_info("\tVM %u: %u vCPUS\n", vm->id, vm->vcpu_count);
Andrew Scullb722f952018-09-27 15:39:10 +0100356 }
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100357
Andrew Scullbb7ae412018-09-28 21:07:15 +0100358 hf_init_sysfs();
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100359
360 return 0;
361}
362
363/**
Andrew Scullbb7ae412018-09-28 21:07:15 +0100364 * Frees up all resources used by the Hafnium driver in preparation for
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100365 * unloading it.
366 */
367static void __exit hf_exit(void)
368{
369 if (hf_sysfs_obj)
370 kobject_put(hf_sysfs_obj);
371
Andrew Scullbb7ae412018-09-28 21:07:15 +0100372 pr_info("Preparing to unload Hafnium\n");
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100373 hf_free_resources(hf_vm_count);
374 pr_info("Hafnium ready to unload\n");
375}
376
377MODULE_LICENSE("GPL");
378
379module_init(hf_init);
380module_exit(hf_exit);