blob: 2b35114f14f544aadadb6df64107937868e6981b [file] [log] [blame]
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +01001#include <linux/hrtimer.h>
2#include <linux/init.h>
3#include <linux/kernel.h>
4#include <linux/kthread.h>
Wedson Almeida Filhof9e11922018-08-12 15:54:31 +01005#include <linux/mm.h>
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +01006#include <linux/module.h>
7#include <linux/sched/task.h>
8#include <linux/slab.h>
9
Andrew Scull55704232018-08-10 17:19:54 +010010#include <hf/call.h>
11
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +010012struct hf_vcpu {
13 spinlock_t lock;
Andrew Scull55704232018-08-10 17:19:54 +010014 uint32_t vm_index;
15 uint32_t vcpu_index;
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +010016 struct task_struct *task;
17 struct hrtimer timer;
18 bool pending_irq;
19};
20
21struct hf_vm {
22 long vcpu_count;
23 struct hf_vcpu *vcpu;
24};
25
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +010026static struct hf_vm *hf_vms;
27static long hf_vm_count;
Wedson Almeida Filhof9e11922018-08-12 15:54:31 +010028static struct page *hf_send_page = NULL;
29static struct page *hf_recv_page = NULL;
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +010030
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +010031/**
32 * Wakes up the thread associated with the vcpu that owns the given timer. This
33 * is called when the timer the thread is waiting on expires.
34 */
35static enum hrtimer_restart hf_vcpu_timer_expired(struct hrtimer *timer)
36{
37 struct hf_vcpu *vcpu = container_of(timer, struct hf_vcpu, timer);
38 wake_up_process(vcpu->task);
39 return HRTIMER_NORESTART;
40}
41
42/**
43 * This is the main loop of each vcpu.
44 */
45static int hf_vcpu_thread(void *data)
46{
47 struct hf_vcpu *vcpu = data;
48 long ret;
49
50 hrtimer_init(&vcpu->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
51 vcpu->timer.function = &hf_vcpu_timer_expired;
52
53 while (!kthread_should_stop()) {
54 unsigned long flags;
55 size_t irqs;
56
57 set_current_state(TASK_RUNNING);
58
59 /* Determine if we must interrupt the vcpu. */
60 spin_lock_irqsave(&vcpu->lock, flags);
61 irqs = vcpu->pending_irq ? 1 : 0;
62 vcpu->pending_irq = false;
63 spin_unlock_irqrestore(&vcpu->lock, flags);
64
65 /* Call into hafnium to run vcpu. */
Andrew Scull55704232018-08-10 17:19:54 +010066 ret = hf_vcpu_run(vcpu->vm_index, vcpu->vcpu_index);
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +010067
68 /* A negative return value indicates that this vcpu needs to
69 * sleep for the given number of nanoseconds.
70 */
71 if (ret < 0) {
72 set_current_state(TASK_INTERRUPTIBLE);
73 if (kthread_should_stop())
74 break;
75 hrtimer_start(&vcpu->timer, -ret, HRTIMER_MODE_REL);
76 schedule();
77 hrtimer_cancel(&vcpu->timer);
78 continue;
79 }
80
Andrew Scullb3a61b52018-09-17 14:30:34 +010081 switch (HF_VCPU_RUN_CODE(ret)) {
82 /* Yield (forcibly or voluntarily). */
83 case HF_VCPU_RUN_YIELD:
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +010084 break;
85
Andrew Scullb3a61b52018-09-17 14:30:34 +010086 /* WFI. */
87 case HF_VCPU_RUN_WAIT_FOR_INTERRUPT:
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +010088 set_current_state(TASK_INTERRUPTIBLE);
89 if (kthread_should_stop())
90 break;
91 schedule();
92 break;
93
Andrew Scullb3a61b52018-09-17 14:30:34 +010094 /* Wake up another vcpu. */
95 case HF_VCPU_RUN_WAKE_UP:
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +010096 {
Andrew Scullb3a61b52018-09-17 14:30:34 +010097 struct hf_vm *vm = &hf_vms[vcpu->vm_index];
98 long target = HF_VCPU_RUN_DATA(ret);
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +010099 if (target < vm->vcpu_count)
100 wake_up_process(vm->vcpu[target].task);
101 }
102 break;
Wedson Almeida Filhof9e11922018-08-12 15:54:31 +0100103
Andrew Scullb3a61b52018-09-17 14:30:34 +0100104 /* Response available. */
105 case HF_VCPU_RUN_RESPONSE_READY:
Wedson Almeida Filhof9e11922018-08-12 15:54:31 +0100106 {
Andrew Scullb3a61b52018-09-17 14:30:34 +0100107 size_t i, count = HF_VCPU_RUN_DATA(ret);
Wedson Almeida Filhof9e11922018-08-12 15:54:31 +0100108 const char *buf = page_address(hf_recv_page);
109 pr_info("Received response (%zu bytes): ",
110 count);
111 for (i = 0; i < count; i++)
112 printk(KERN_CONT "%c", buf[i]);
113 printk(KERN_CONT "\n");
Andrew Scull55704232018-08-10 17:19:54 +0100114 hf_rpc_ack();
Wedson Almeida Filhof9e11922018-08-12 15:54:31 +0100115 }
116 break;
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100117 }
118 }
119
120 set_current_state(TASK_RUNNING);
121
122 return 0;
123}
124
125/**
126 * Frees all resources, including threads, associated with the hafnium driver.
127 */
128static void hf_free_resources(long vm_count)
129{
130 long i, j;
131
132 /*
133 * First stop all worker threads. We need to do this before freeing
134 * resources because workers may reference each other, so it is only
135 * safe to free resources after they have all stopped.
136 */
137 for (i = 0; i < vm_count; i++) {
Andrew Scullb3a61b52018-09-17 14:30:34 +0100138 struct hf_vm *vm = &hf_vms[i];
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100139 for (j = 0; j < vm->vcpu_count; j++)
140 kthread_stop(vm->vcpu[j].task);
141 }
142
143 /* Free resources. */
144 for (i = 0; i < vm_count; i++) {
Andrew Scullb3a61b52018-09-17 14:30:34 +0100145 struct hf_vm *vm = &hf_vms[i];
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100146 for (j = 0; j < vm->vcpu_count; j++)
147 put_task_struct(vm->vcpu[j].task);
148 kfree(vm->vcpu);
149 }
150
151 kfree(hf_vms);
152}
153
154static ssize_t hf_interrupt_store(struct kobject *kobj,
155 struct kobj_attribute *attr, const char *buf,
156 size_t count)
157{
158 struct hf_vcpu *vcpu;
159 unsigned long flags;
160 struct task_struct *task;
161
162 /* TODO: Parse input to determine which vcpu to interrupt. */
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100163 /* TODO: Check bounds. */
164
Andrew Scullb3a61b52018-09-17 14:30:34 +0100165 vcpu = &hf_vms[0].vcpu[0];
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100166
167 spin_lock_irqsave(&vcpu->lock, flags);
168 vcpu->pending_irq = true;
169 /* TODO: Do we need to increment the task's ref count here? */
170 task = vcpu->task;
171 spin_unlock_irqrestore(&vcpu->lock, flags);
172
173 /* Wake up the task. If it's already running, kick it out. */
174 /* TODO: There's a race here: the kick may happen right before we go
175 * to the hypervisor. */
176 if (wake_up_process(task) == 0)
177 kick_process(task);
178
179 return count;
180}
181
Wedson Almeida Filhof9e11922018-08-12 15:54:31 +0100182static ssize_t hf_send_store(struct kobject *kobj, struct kobj_attribute *attr,
183 const char *buf, size_t count)
184{
185 long ret;
186 struct hf_vm *vm;
187
Andrew Scullb3a61b52018-09-17 14:30:34 +0100188 count = min_t(size_t, count, HF_RPC_REQUEST_MAX_SIZE);
Wedson Almeida Filhof9e11922018-08-12 15:54:31 +0100189
190 /* Copy data to send buffer. */
191 memcpy(page_address(hf_send_page), buf, count);
Andrew Scull55704232018-08-10 17:19:54 +0100192 ret = hf_rpc_request(0, count);
Wedson Almeida Filhof9e11922018-08-12 15:54:31 +0100193 if (ret < 0)
194 return -EAGAIN;
195
Andrew Scullb3a61b52018-09-17 14:30:34 +0100196 vm = &hf_vms[0];
Wedson Almeida Filhof9e11922018-08-12 15:54:31 +0100197 if (ret > vm->vcpu_count)
198 return -EINVAL;
199
Andrew Scullb3a61b52018-09-17 14:30:34 +0100200 if (ret == vm->vcpu_count) {
Wedson Almeida Filhof9e11922018-08-12 15:54:31 +0100201 /*
202 * TODO: We need to interrupt some CPU because none is actually
203 * waiting for data.
204 */
205 } else {
206 /* Wake up the vcpu that is going to process the data. */
207 /* TODO: There's a race where thread may get wake up before it
208 * goes to sleep. Fix this. */
Andrew Scullb3a61b52018-09-17 14:30:34 +0100209 wake_up_process(vm->vcpu[ret].task);
Wedson Almeida Filhof9e11922018-08-12 15:54:31 +0100210 }
211
212 return count;
213}
214
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100215static struct kobject *hf_sysfs_obj = NULL;
216static struct kobj_attribute interrupt_attr =
217 __ATTR(interrupt, 0200, NULL, hf_interrupt_store);
Wedson Almeida Filhof9e11922018-08-12 15:54:31 +0100218static struct kobj_attribute send_attr =
219 __ATTR(send, 0200, NULL, hf_send_store);
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100220
221/**
222 * Initializes the hafnium driver by creating a thread for each vCPU of each
223 * virtual machine.
224 */
225static int __init hf_init(void)
226{
227 long ret;
228 long i, j;
229
Wedson Almeida Filhof9e11922018-08-12 15:54:31 +0100230 /* Allocate a page for send and receive buffers. */
231 hf_send_page = alloc_page(GFP_KERNEL);
232 if (!hf_send_page) {
233 pr_err("Unable to allocate send buffer\n");
234 return -ENOMEM;
235 }
236
237 hf_recv_page = alloc_page(GFP_KERNEL);
238 if (!hf_recv_page) {
239 __free_page(hf_send_page);
240 pr_err("Unable to allocate receive buffer\n");
241 return -ENOMEM;
242 }
243
244 /*
245 * Configure both addresses. Once configured, we cannot free these pages
246 * because the hypervisor will use them, even if the module is
247 * unloaded.
248 */
Andrew Scull55704232018-08-10 17:19:54 +0100249 ret = hf_vm_configure(page_to_phys(hf_send_page),
250 page_to_phys(hf_recv_page));
Wedson Almeida Filhof9e11922018-08-12 15:54:31 +0100251 if (ret) {
252 __free_page(hf_send_page);
253 __free_page(hf_recv_page);
254 /* TODO: We may want to grab this information from hypervisor
255 * and go from there. */
256 pr_err("Unable to configure VM\n");
257 return -EIO;
258 }
259
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100260 /* Get the number of VMs and allocate storage for them. */
Andrew Scull55704232018-08-10 17:19:54 +0100261 ret = hf_vm_get_count();
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100262 if (ret < 0) {
263 pr_err("Unable to retrieve number of VMs: %ld\n", ret);
264 return ret;
265 }
266
267 hf_vm_count = ret;
268 hf_vms = kmalloc(sizeof(struct hf_vm) * hf_vm_count, GFP_KERNEL);
269 if (!hf_vms)
270 return -ENOMEM;
271
272 /* Initialize each VM. */
273 for (i = 0; i < hf_vm_count; i++) {
Andrew Scullb3a61b52018-09-17 14:30:34 +0100274 struct hf_vm *vm = &hf_vms[i];
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100275
Andrew Scull55704232018-08-10 17:19:54 +0100276 ret = hf_vcpu_get_count(i);
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100277 if (ret < 0) {
278 pr_err("HF_VCPU_GET_COUNT failed for vm=%ld: %ld", i,
279 ret);
280 hf_free_resources(i);
281 return ret;
282 }
283
284 vm->vcpu_count = ret;
285 vm->vcpu = kmalloc(sizeof(struct hf_vcpu) * vm->vcpu_count,
286 GFP_KERNEL);
287 if (!vm->vcpu) {
288 pr_err("No memory for %ld vcpus for vm %ld",
289 vm->vcpu_count, i);
290 hf_free_resources(i);
291 return -ENOMEM;
292 }
293
294 /* Create a kernel thread for each vcpu. */
295 for (j = 0; j < vm->vcpu_count; j++) {
Andrew Scullb3a61b52018-09-17 14:30:34 +0100296 struct hf_vcpu *vcpu = &vm->vcpu[j];
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100297 vcpu->task = kthread_create(hf_vcpu_thread, vcpu,
298 "vcpu_thread_%ld_%ld",
299 i, j);
300 if (IS_ERR(vcpu->task)) {
301 pr_err("Error creating task (vm=%ld,vcpu=%ld)"
302 ": %ld\n", i, j, PTR_ERR(vcpu->task));
303 vm->vcpu_count = j;
304 hf_free_resources(i + 1);
305 return PTR_ERR(vcpu->task);
306 }
307
308 get_task_struct(vcpu->task);
309 spin_lock_init(&vcpu->lock);
310 vcpu->vm_index = i;
311 vcpu->vcpu_index = j;
312 vcpu->pending_irq = false;
313 }
314 }
315
316 /* Start running threads now that all is initialized. */
317 for (i = 0; i < hf_vm_count; i++) {
Andrew Scullb3a61b52018-09-17 14:30:34 +0100318 struct hf_vm *vm = &hf_vms[i];
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100319 for (j = 0; j < vm->vcpu_count; j++)
320 wake_up_process(vm->vcpu[j].task);
321 }
322
323 /* Dump vm/vcpu count info. */
324 pr_info("Hafnium successfully loaded with %ld VMs:\n", hf_vm_count);
325 for (i = 0; i < hf_vm_count; i++)
326 pr_info("\tVM %ld: %ld vCPUS\n", i, hf_vms[i].vcpu_count);
327
328 /* Create the sysfs interface to interrupt vcpus. */
329 hf_sysfs_obj = kobject_create_and_add("hafnium", kernel_kobj);
330 if (!hf_sysfs_obj) {
331 pr_err("Unable to create sysfs object");
332 } else {
333 ret = sysfs_create_file(hf_sysfs_obj, &interrupt_attr.attr);
334 if (ret)
335 pr_err("Unable to create 'interrupt' sysfs file");
Wedson Almeida Filhof9e11922018-08-12 15:54:31 +0100336
337 ret = sysfs_create_file(hf_sysfs_obj, &send_attr.attr);
338 if (ret)
339 pr_err("Unable to create 'send' sysfs file");
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100340 }
341
342 return 0;
343}
344
345/**
346 * Frees up all resources used by the hafnium driver in preparation for
347 * unloading it.
348 */
349static void __exit hf_exit(void)
350{
351 if (hf_sysfs_obj)
352 kobject_put(hf_sysfs_obj);
353
354 pr_info("Preparing to unload hafnium\n");
355 hf_free_resources(hf_vm_count);
356 pr_info("Hafnium ready to unload\n");
357}
358
359MODULE_LICENSE("GPL");
360
361module_init(hf_init);
362module_exit(hf_exit);