blob: a0c864154b22369df23a4c4d94d2408e35bf4863 [file] [log] [blame]
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +01001#include <linux/hrtimer.h>
2#include <linux/init.h>
3#include <linux/kernel.h>
4#include <linux/kthread.h>
Wedson Almeida Filhof9e11922018-08-12 15:54:31 +01005#include <linux/mm.h>
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +01006#include <linux/module.h>
7#include <linux/sched/task.h>
8#include <linux/slab.h>
9
10struct hf_vcpu {
11 spinlock_t lock;
12 long vm_index;
13 long vcpu_index;
14 struct task_struct *task;
15 struct hrtimer timer;
16 bool pending_irq;
17};
18
19struct hf_vm {
20 long vcpu_count;
21 struct hf_vcpu *vcpu;
22};
23
24long hf_hvc(size_t arg0, size_t arg1, size_t arg2, size_t arg3);
25
26static struct hf_vm *hf_vms;
27static long hf_vm_count;
Wedson Almeida Filhof9e11922018-08-12 15:54:31 +010028static struct page *hf_send_page = NULL;
29static struct page *hf_recv_page = NULL;
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +010030
31/* TODO: Define constants below according to spec. Include shared header. */
Wedson Almeida Filhof9e11922018-08-12 15:54:31 +010032#define HF_VCPU_RUN 0xff00
33#define HF_VM_GET_COUNT 0xff01
34#define HF_VCPU_GET_COUNT 0xff02
35#define HF_VM_CONFIGURE 0xff03
36#define HF_RPC_REQUEST 0xff04
37#define HF_RPC_READ_REQUEST 0xff05
38#define HF_RPC_ACK 0xff06
39#define HF_RPC_REPLY 0xff07
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +010040
41/**
42 * Wakes up the thread associated with the vcpu that owns the given timer. This
43 * is called when the timer the thread is waiting on expires.
44 */
45static enum hrtimer_restart hf_vcpu_timer_expired(struct hrtimer *timer)
46{
47 struct hf_vcpu *vcpu = container_of(timer, struct hf_vcpu, timer);
48 wake_up_process(vcpu->task);
49 return HRTIMER_NORESTART;
50}
51
52/**
53 * This is the main loop of each vcpu.
54 */
55static int hf_vcpu_thread(void *data)
56{
57 struct hf_vcpu *vcpu = data;
58 long ret;
59
60 hrtimer_init(&vcpu->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
61 vcpu->timer.function = &hf_vcpu_timer_expired;
62
63 while (!kthread_should_stop()) {
64 unsigned long flags;
65 size_t irqs;
66
67 set_current_state(TASK_RUNNING);
68
69 /* Determine if we must interrupt the vcpu. */
70 spin_lock_irqsave(&vcpu->lock, flags);
71 irqs = vcpu->pending_irq ? 1 : 0;
72 vcpu->pending_irq = false;
73 spin_unlock_irqrestore(&vcpu->lock, flags);
74
75 /* Call into hafnium to run vcpu. */
76 ret = hf_hvc(HF_VCPU_RUN, vcpu->vm_index, vcpu->vcpu_index,
77 irqs);
78
79 /* A negative return value indicates that this vcpu needs to
80 * sleep for the given number of nanoseconds.
81 */
82 if (ret < 0) {
83 set_current_state(TASK_INTERRUPTIBLE);
84 if (kthread_should_stop())
85 break;
86 hrtimer_start(&vcpu->timer, -ret, HRTIMER_MODE_REL);
87 schedule();
88 hrtimer_cancel(&vcpu->timer);
89 continue;
90 }
91
92 /* TODO: Use constants below. */
93 switch ((u8)ret) {
94 case 0x0: /* Yield (forcibly or voluntarily). */
95 break;
96
97 case 0x01: /* WFI. */
98 set_current_state(TASK_INTERRUPTIBLE);
99 if (kthread_should_stop())
100 break;
101 schedule();
102 break;
103
104 case 0x02: /* Wake up another vcpu. */
105 {
106 struct hf_vm *vm = hf_vms + vcpu->vm_index;
107 long target = ret >> 8;
108 if (target < vm->vcpu_count)
109 wake_up_process(vm->vcpu[target].task);
110 }
111 break;
Wedson Almeida Filhof9e11922018-08-12 15:54:31 +0100112
113 case 0x03: /* Response available. */
114 {
115 size_t i, count = ret >> 8;
116 const char *buf = page_address(hf_recv_page);
117 pr_info("Received response (%zu bytes): ",
118 count);
119 for (i = 0; i < count; i++)
120 printk(KERN_CONT "%c", buf[i]);
121 printk(KERN_CONT "\n");
122 hf_hvc(HF_RPC_ACK, 0, 0, 0);
123 }
124 break;
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100125 }
126 }
127
128 set_current_state(TASK_RUNNING);
129
130 return 0;
131}
132
133/**
134 * Frees all resources, including threads, associated with the hafnium driver.
135 */
136static void hf_free_resources(long vm_count)
137{
138 long i, j;
139
140 /*
141 * First stop all worker threads. We need to do this before freeing
142 * resources because workers may reference each other, so it is only
143 * safe to free resources after they have all stopped.
144 */
145 for (i = 0; i < vm_count; i++) {
146 struct hf_vm *vm = hf_vms + i;
147 for (j = 0; j < vm->vcpu_count; j++)
148 kthread_stop(vm->vcpu[j].task);
149 }
150
151 /* Free resources. */
152 for (i = 0; i < vm_count; i++) {
153 struct hf_vm *vm = hf_vms + i;
154 for (j = 0; j < vm->vcpu_count; j++)
155 put_task_struct(vm->vcpu[j].task);
156 kfree(vm->vcpu);
157 }
158
159 kfree(hf_vms);
160}
161
162static ssize_t hf_interrupt_store(struct kobject *kobj,
163 struct kobj_attribute *attr, const char *buf,
164 size_t count)
165{
166 struct hf_vcpu *vcpu;
167 unsigned long flags;
168 struct task_struct *task;
169
170 /* TODO: Parse input to determine which vcpu to interrupt. */
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100171 /* TODO: Check bounds. */
172
173 vcpu = hf_vms[0].vcpu + 0;
174
175 spin_lock_irqsave(&vcpu->lock, flags);
176 vcpu->pending_irq = true;
177 /* TODO: Do we need to increment the task's ref count here? */
178 task = vcpu->task;
179 spin_unlock_irqrestore(&vcpu->lock, flags);
180
181 /* Wake up the task. If it's already running, kick it out. */
182 /* TODO: There's a race here: the kick may happen right before we go
183 * to the hypervisor. */
184 if (wake_up_process(task) == 0)
185 kick_process(task);
186
187 return count;
188}
189
Wedson Almeida Filhof9e11922018-08-12 15:54:31 +0100190static ssize_t hf_send_store(struct kobject *kobj, struct kobj_attribute *attr,
191 const char *buf, size_t count)
192{
193 long ret;
194 struct hf_vm *vm;
195
196 /* TODO: Use constant. */
197 if (count > 4096)
198 count = 4096;
199
200 /* Copy data to send buffer. */
201 memcpy(page_address(hf_send_page), buf, count);
202 ret = hf_hvc(HF_RPC_REQUEST, 0, count, 0);
203 if (ret < 0)
204 return -EAGAIN;
205
206 vm = hf_vms + 0;
207 if (ret > vm->vcpu_count)
208 return -EINVAL;
209
210 if (ret == 0) {
211 /*
212 * TODO: We need to interrupt some CPU because none is actually
213 * waiting for data.
214 */
215 } else {
216 /* Wake up the vcpu that is going to process the data. */
217 /* TODO: There's a race where thread may get wake up before it
218 * goes to sleep. Fix this. */
219 wake_up_process(vm->vcpu[ret - 1].task);
220 }
221
222 return count;
223}
224
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100225static struct kobject *hf_sysfs_obj = NULL;
226static struct kobj_attribute interrupt_attr =
227 __ATTR(interrupt, 0200, NULL, hf_interrupt_store);
Wedson Almeida Filhof9e11922018-08-12 15:54:31 +0100228static struct kobj_attribute send_attr =
229 __ATTR(send, 0200, NULL, hf_send_store);
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100230
231/**
232 * Initializes the hafnium driver by creating a thread for each vCPU of each
233 * virtual machine.
234 */
235static int __init hf_init(void)
236{
237 long ret;
238 long i, j;
239
Wedson Almeida Filhof9e11922018-08-12 15:54:31 +0100240 /* Allocate a page for send and receive buffers. */
241 hf_send_page = alloc_page(GFP_KERNEL);
242 if (!hf_send_page) {
243 pr_err("Unable to allocate send buffer\n");
244 return -ENOMEM;
245 }
246
247 hf_recv_page = alloc_page(GFP_KERNEL);
248 if (!hf_recv_page) {
249 __free_page(hf_send_page);
250 pr_err("Unable to allocate receive buffer\n");
251 return -ENOMEM;
252 }
253
254 /*
255 * Configure both addresses. Once configured, we cannot free these pages
256 * because the hypervisor will use them, even if the module is
257 * unloaded.
258 */
259 ret = hf_hvc(HF_VM_CONFIGURE, (size_t)page_to_phys(hf_send_page),
260 (size_t)page_to_phys(hf_recv_page), 0);
261 if (ret) {
262 __free_page(hf_send_page);
263 __free_page(hf_recv_page);
264 /* TODO: We may want to grab this information from hypervisor
265 * and go from there. */
266 pr_err("Unable to configure VM\n");
267 return -EIO;
268 }
269
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100270 /* Get the number of VMs and allocate storage for them. */
271 ret = hf_hvc(HF_VM_GET_COUNT, 0, 0, 0);
272 if (ret < 0) {
273 pr_err("Unable to retrieve number of VMs: %ld\n", ret);
274 return ret;
275 }
276
277 hf_vm_count = ret;
278 hf_vms = kmalloc(sizeof(struct hf_vm) * hf_vm_count, GFP_KERNEL);
279 if (!hf_vms)
280 return -ENOMEM;
281
282 /* Initialize each VM. */
283 for (i = 0; i < hf_vm_count; i++) {
284 struct hf_vm *vm = hf_vms + i;
285
286 ret = hf_hvc(HF_VCPU_GET_COUNT, i, 0, 0);
287 if (ret < 0) {
288 pr_err("HF_VCPU_GET_COUNT failed for vm=%ld: %ld", i,
289 ret);
290 hf_free_resources(i);
291 return ret;
292 }
293
294 vm->vcpu_count = ret;
295 vm->vcpu = kmalloc(sizeof(struct hf_vcpu) * vm->vcpu_count,
296 GFP_KERNEL);
297 if (!vm->vcpu) {
298 pr_err("No memory for %ld vcpus for vm %ld",
299 vm->vcpu_count, i);
300 hf_free_resources(i);
301 return -ENOMEM;
302 }
303
304 /* Create a kernel thread for each vcpu. */
305 for (j = 0; j < vm->vcpu_count; j++) {
306 struct hf_vcpu *vcpu = vm->vcpu + j;
307 vcpu->task = kthread_create(hf_vcpu_thread, vcpu,
308 "vcpu_thread_%ld_%ld",
309 i, j);
310 if (IS_ERR(vcpu->task)) {
311 pr_err("Error creating task (vm=%ld,vcpu=%ld)"
312 ": %ld\n", i, j, PTR_ERR(vcpu->task));
313 vm->vcpu_count = j;
314 hf_free_resources(i + 1);
315 return PTR_ERR(vcpu->task);
316 }
317
318 get_task_struct(vcpu->task);
319 spin_lock_init(&vcpu->lock);
320 vcpu->vm_index = i;
321 vcpu->vcpu_index = j;
322 vcpu->pending_irq = false;
323 }
324 }
325
326 /* Start running threads now that all is initialized. */
327 for (i = 0; i < hf_vm_count; i++) {
328 struct hf_vm *vm = hf_vms + i;
329 for (j = 0; j < vm->vcpu_count; j++)
330 wake_up_process(vm->vcpu[j].task);
331 }
332
333 /* Dump vm/vcpu count info. */
334 pr_info("Hafnium successfully loaded with %ld VMs:\n", hf_vm_count);
335 for (i = 0; i < hf_vm_count; i++)
336 pr_info("\tVM %ld: %ld vCPUS\n", i, hf_vms[i].vcpu_count);
337
338 /* Create the sysfs interface to interrupt vcpus. */
339 hf_sysfs_obj = kobject_create_and_add("hafnium", kernel_kobj);
340 if (!hf_sysfs_obj) {
341 pr_err("Unable to create sysfs object");
342 } else {
343 ret = sysfs_create_file(hf_sysfs_obj, &interrupt_attr.attr);
344 if (ret)
345 pr_err("Unable to create 'interrupt' sysfs file");
Wedson Almeida Filhof9e11922018-08-12 15:54:31 +0100346
347 ret = sysfs_create_file(hf_sysfs_obj, &send_attr.attr);
348 if (ret)
349 pr_err("Unable to create 'send' sysfs file");
Wedson Almeida Filho2f62b422018-06-19 06:44:32 +0100350 }
351
352 return 0;
353}
354
355/**
356 * Frees up all resources used by the hafnium driver in preparation for
357 * unloading it.
358 */
359static void __exit hf_exit(void)
360{
361 if (hf_sysfs_obj)
362 kobject_put(hf_sysfs_obj);
363
364 pr_info("Preparing to unload hafnium\n");
365 hf_free_resources(hf_vm_count);
366 pr_info("Hafnium ready to unload\n");
367}
368
369MODULE_LICENSE("GPL");
370
371module_init(hf_init);
372module_exit(hf_exit);