blob: b12c1b0d3e1d0be92e7c07d1d75207615a4b257c [file] [log] [blame]
David Brazdil0f672f62019-12-10 10:32:29 +00001// SPDX-License-Identifier: GPL-2.0
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002/*
David Brazdil0f672f62019-12-10 10:32:29 +00003 * Copyright (C) 2018 Cambridge Greys Ltd
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004 * Copyright (C) 2015-2016 Anton Ivanov (aivanov@brocade.com)
5 * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00006 */
7
8/* 2001-09-28...2002-04-17
9 * Partition stuff by James_McMechan@hotmail.com
10 * old style ubd by setting UBD_SHIFT to 0
11 * 2002-09-27...2002-10-18 massive tinkering for 2.5
12 * partitions have changed in 2.5
13 * 2003-01-29 more tinkering for 2.5.59-1
14 * This should now address the sysfs problems and has
15 * the symlink for devfs to allow for booting with
16 * the common /dev/ubd/discX/... names rather than
17 * only /dev/ubdN/discN this version also has lots of
18 * clean ups preparing for ubd-many.
19 * James McMechan
20 */
21
22#define UBD_SHIFT 4
23
24#include <linux/module.h>
25#include <linux/init.h>
26#include <linux/blkdev.h>
David Brazdil0f672f62019-12-10 10:32:29 +000027#include <linux/blk-mq.h>
Andrew Scullb4b6d4a2019-01-02 15:54:55 +000028#include <linux/ata.h>
29#include <linux/hdreg.h>
30#include <linux/cdrom.h>
31#include <linux/proc_fs.h>
32#include <linux/seq_file.h>
33#include <linux/ctype.h>
34#include <linux/slab.h>
35#include <linux/vmalloc.h>
36#include <linux/platform_device.h>
37#include <linux/scatterlist.h>
38#include <asm/tlbflush.h>
39#include <kern_util.h>
40#include "mconsole_kern.h"
41#include <init.h>
42#include <irq_kern.h>
43#include "ubd.h"
44#include <os.h>
45#include "cow.h"
46
David Brazdil0f672f62019-12-10 10:32:29 +000047/* Max request size is determined by sector mask - 32K */
48#define UBD_MAX_REQUEST (8 * sizeof(long))
Andrew Scullb4b6d4a2019-01-02 15:54:55 +000049
Olivier Deprez0e641232021-09-23 10:07:05 +020050struct io_desc {
51 char *buffer;
52 unsigned long length;
53 unsigned long sector_mask;
54 unsigned long long cow_offset;
55 unsigned long bitmap_words[2];
56};
57
Andrew Scullb4b6d4a2019-01-02 15:54:55 +000058struct io_thread_req {
59 struct request *req;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +000060 int fds[2];
61 unsigned long offsets[2];
62 unsigned long long offset;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +000063 int sectorsize;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +000064 int error;
Olivier Deprez0e641232021-09-23 10:07:05 +020065
66 int desc_cnt;
67 /* io_desc has to be the last element of the struct */
68 struct io_desc io_desc[];
Andrew Scullb4b6d4a2019-01-02 15:54:55 +000069};
70
71
72static struct io_thread_req * (*irq_req_buffer)[];
73static struct io_thread_req *irq_remainder;
74static int irq_remainder_size;
75
76static struct io_thread_req * (*io_req_buffer)[];
77static struct io_thread_req *io_remainder;
78static int io_remainder_size;
79
80
81
82static inline int ubd_test_bit(__u64 bit, unsigned char *data)
83{
84 __u64 n;
85 int bits, off;
86
87 bits = sizeof(data[0]) * 8;
88 n = bit / bits;
89 off = bit % bits;
90 return (data[n] & (1 << off)) != 0;
91}
92
93static inline void ubd_set_bit(__u64 bit, unsigned char *data)
94{
95 __u64 n;
96 int bits, off;
97
98 bits = sizeof(data[0]) * 8;
99 n = bit / bits;
100 off = bit % bits;
101 data[n] |= (1 << off);
102}
103/*End stuff from ubd_user.h*/
104
105#define DRIVER_NAME "uml-blkdev"
106
107static DEFINE_MUTEX(ubd_lock);
108static DEFINE_MUTEX(ubd_mutex); /* replaces BKL, might not be needed */
109
110static int ubd_open(struct block_device *bdev, fmode_t mode);
111static void ubd_release(struct gendisk *disk, fmode_t mode);
112static int ubd_ioctl(struct block_device *bdev, fmode_t mode,
113 unsigned int cmd, unsigned long arg);
114static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo);
115
116#define MAX_DEV (16)
117
118static const struct block_device_operations ubd_blops = {
119 .owner = THIS_MODULE,
120 .open = ubd_open,
121 .release = ubd_release,
122 .ioctl = ubd_ioctl,
Olivier Deprez157378f2022-04-04 15:47:50 +0200123 .compat_ioctl = blkdev_compat_ptr_ioctl,
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000124 .getgeo = ubd_getgeo,
125};
126
127/* Protected by ubd_lock */
128static int fake_major = UBD_MAJOR;
129static struct gendisk *ubd_gendisk[MAX_DEV];
130static struct gendisk *fake_gendisk[MAX_DEV];
131
132#ifdef CONFIG_BLK_DEV_UBD_SYNC
133#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 1, .c = 0, \
134 .cl = 1 })
135#else
136#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 0, .c = 0, \
137 .cl = 1 })
138#endif
139static struct openflags global_openflags = OPEN_FLAGS;
140
141struct cow {
142 /* backing file name */
143 char *file;
144 /* backing file fd */
145 int fd;
146 unsigned long *bitmap;
147 unsigned long bitmap_len;
148 int bitmap_offset;
149 int data_offset;
150};
151
152#define MAX_SG 64
153
154struct ubd {
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000155 /* name (and fd, below) of the file opened for writing, either the
156 * backing or the cow file. */
157 char *file;
158 int count;
159 int fd;
160 __u64 size;
161 struct openflags boot_openflags;
162 struct openflags openflags;
163 unsigned shared:1;
164 unsigned no_cow:1;
David Brazdil0f672f62019-12-10 10:32:29 +0000165 unsigned no_trim:1;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000166 struct cow cow;
167 struct platform_device pdev;
168 struct request_queue *queue;
David Brazdil0f672f62019-12-10 10:32:29 +0000169 struct blk_mq_tag_set tag_set;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000170 spinlock_t lock;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000171};
172
173#define DEFAULT_COW { \
174 .file = NULL, \
175 .fd = -1, \
176 .bitmap = NULL, \
177 .bitmap_offset = 0, \
178 .data_offset = 0, \
179}
180
181#define DEFAULT_UBD { \
182 .file = NULL, \
183 .count = 0, \
184 .fd = -1, \
185 .size = -1, \
186 .boot_openflags = OPEN_FLAGS, \
187 .openflags = OPEN_FLAGS, \
188 .no_cow = 0, \
David Brazdil0f672f62019-12-10 10:32:29 +0000189 .no_trim = 0, \
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000190 .shared = 0, \
191 .cow = DEFAULT_COW, \
192 .lock = __SPIN_LOCK_UNLOCKED(ubd_devs.lock), \
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000193}
194
195/* Protected by ubd_lock */
196static struct ubd ubd_devs[MAX_DEV] = { [0 ... MAX_DEV - 1] = DEFAULT_UBD };
197
198/* Only changed by fake_ide_setup which is a setup */
199static int fake_ide = 0;
200static struct proc_dir_entry *proc_ide_root = NULL;
201static struct proc_dir_entry *proc_ide = NULL;
202
David Brazdil0f672f62019-12-10 10:32:29 +0000203static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx,
204 const struct blk_mq_queue_data *bd);
205
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000206static void make_proc_ide(void)
207{
208 proc_ide_root = proc_mkdir("ide", NULL);
209 proc_ide = proc_mkdir("ide0", proc_ide_root);
210}
211
212static int fake_ide_media_proc_show(struct seq_file *m, void *v)
213{
214 seq_puts(m, "disk\n");
215 return 0;
216}
217
218static void make_ide_entries(const char *dev_name)
219{
220 struct proc_dir_entry *dir, *ent;
221 char name[64];
222
223 if(proc_ide_root == NULL) make_proc_ide();
224
225 dir = proc_mkdir(dev_name, proc_ide);
226 if(!dir) return;
227
228 ent = proc_create_single("media", S_IRUGO, dir,
229 fake_ide_media_proc_show);
230 if(!ent) return;
231 snprintf(name, sizeof(name), "ide0/%s", dev_name);
232 proc_symlink(dev_name, proc_ide_root, name);
233}
234
235static int fake_ide_setup(char *str)
236{
237 fake_ide = 1;
238 return 1;
239}
240
241__setup("fake_ide", fake_ide_setup);
242
243__uml_help(fake_ide_setup,
244"fake_ide\n"
245" Create ide0 entries that map onto ubd devices.\n\n"
246);
247
248static int parse_unit(char **ptr)
249{
250 char *str = *ptr, *end;
251 int n = -1;
252
253 if(isdigit(*str)) {
254 n = simple_strtoul(str, &end, 0);
255 if(end == str)
256 return -1;
257 *ptr = end;
258 }
259 else if (('a' <= *str) && (*str <= 'z')) {
260 n = *str - 'a';
261 str++;
262 *ptr = str;
263 }
264 return n;
265}
266
267/* If *index_out == -1 at exit, the passed option was a general one;
268 * otherwise, the str pointer is used (and owned) inside ubd_devs array, so it
269 * should not be freed on exit.
270 */
271static int ubd_setup_common(char *str, int *index_out, char **error_out)
272{
273 struct ubd *ubd_dev;
274 struct openflags flags = global_openflags;
275 char *backing_file;
276 int n, err = 0, i;
277
278 if(index_out) *index_out = -1;
279 n = *str;
280 if(n == '='){
281 char *end;
282 int major;
283
284 str++;
285 if(!strcmp(str, "sync")){
286 global_openflags = of_sync(global_openflags);
David Brazdil0f672f62019-12-10 10:32:29 +0000287 return err;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000288 }
289
290 err = -EINVAL;
291 major = simple_strtoul(str, &end, 0);
292 if((*end != '\0') || (end == str)){
293 *error_out = "Didn't parse major number";
David Brazdil0f672f62019-12-10 10:32:29 +0000294 return err;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000295 }
296
297 mutex_lock(&ubd_lock);
298 if (fake_major != UBD_MAJOR) {
299 *error_out = "Can't assign a fake major twice";
300 goto out1;
301 }
302
303 fake_major = major;
304
305 printk(KERN_INFO "Setting extra ubd major number to %d\n",
306 major);
307 err = 0;
308 out1:
309 mutex_unlock(&ubd_lock);
310 return err;
311 }
312
313 n = parse_unit(&str);
314 if(n < 0){
315 *error_out = "Couldn't parse device number";
316 return -EINVAL;
317 }
318 if(n >= MAX_DEV){
319 *error_out = "Device number out of range";
320 return 1;
321 }
322
323 err = -EBUSY;
324 mutex_lock(&ubd_lock);
325
326 ubd_dev = &ubd_devs[n];
327 if(ubd_dev->file != NULL){
328 *error_out = "Device is already configured";
329 goto out;
330 }
331
332 if (index_out)
333 *index_out = n;
334
335 err = -EINVAL;
David Brazdil0f672f62019-12-10 10:32:29 +0000336 for (i = 0; i < sizeof("rscdt="); i++) {
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000337 switch (*str) {
338 case 'r':
339 flags.w = 0;
340 break;
341 case 's':
342 flags.s = 1;
343 break;
344 case 'd':
345 ubd_dev->no_cow = 1;
346 break;
347 case 'c':
348 ubd_dev->shared = 1;
349 break;
David Brazdil0f672f62019-12-10 10:32:29 +0000350 case 't':
351 ubd_dev->no_trim = 1;
352 break;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000353 case '=':
354 str++;
355 goto break_loop;
356 default:
357 *error_out = "Expected '=' or flag letter "
David Brazdil0f672f62019-12-10 10:32:29 +0000358 "(r, s, c, t or d)";
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000359 goto out;
360 }
361 str++;
362 }
363
364 if (*str == '=')
365 *error_out = "Too many flags specified";
366 else
367 *error_out = "Missing '='";
368 goto out;
369
370break_loop:
371 backing_file = strchr(str, ',');
372
373 if (backing_file == NULL)
374 backing_file = strchr(str, ':');
375
376 if(backing_file != NULL){
377 if(ubd_dev->no_cow){
378 *error_out = "Can't specify both 'd' and a cow file";
379 goto out;
380 }
381 else {
382 *backing_file = '\0';
383 backing_file++;
384 }
385 }
386 err = 0;
387 ubd_dev->file = str;
388 ubd_dev->cow.file = backing_file;
389 ubd_dev->boot_openflags = flags;
390out:
391 mutex_unlock(&ubd_lock);
392 return err;
393}
394
395static int ubd_setup(char *str)
396{
397 char *error;
398 int err;
399
400 err = ubd_setup_common(str, NULL, &error);
401 if(err)
402 printk(KERN_ERR "Failed to initialize device with \"%s\" : "
403 "%s\n", str, error);
404 return 1;
405}
406
407__setup("ubd", ubd_setup);
408__uml_help(ubd_setup,
409"ubd<n><flags>=<filename>[(:|,)<filename2>]\n"
410" This is used to associate a device with a file in the underlying\n"
411" filesystem. When specifying two filenames, the first one is the\n"
412" COW name and the second is the backing file name. As separator you can\n"
413" use either a ':' or a ',': the first one allows writing things like;\n"
414" ubd0=~/Uml/root_cow:~/Uml/root_backing_file\n"
415" while with a ',' the shell would not expand the 2nd '~'.\n"
416" When using only one filename, UML will detect whether to treat it like\n"
417" a COW file or a backing file. To override this detection, add the 'd'\n"
418" flag:\n"
419" ubd0d=BackingFile\n"
420" Usually, there is a filesystem in the file, but \n"
421" that's not required. Swap devices containing swap files can be\n"
422" specified like this. Also, a file which doesn't contain a\n"
423" filesystem can have its contents read in the virtual \n"
424" machine by running 'dd' on the device. <n> must be in the range\n"
425" 0 to 7. Appending an 'r' to the number will cause that device\n"
426" to be mounted read-only. For example ubd1r=./ext_fs. Appending\n"
427" an 's' will cause data to be written to disk on the host immediately.\n"
428" 'c' will cause the device to be treated as being shared between multiple\n"
429" UMLs and file locking will be turned off - this is appropriate for a\n"
430" cluster filesystem and inappropriate at almost all other times.\n\n"
David Brazdil0f672f62019-12-10 10:32:29 +0000431" 't' will disable trim/discard support on the device (enabled by default).\n\n"
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000432);
433
434static int udb_setup(char *str)
435{
436 printk("udb%s specified on command line is almost certainly a ubd -> "
437 "udb TYPO\n", str);
438 return 1;
439}
440
441__setup("udb", udb_setup);
442__uml_help(udb_setup,
443"udb\n"
444" This option is here solely to catch ubd -> udb typos, which can be\n"
445" to impossible to catch visually unless you specifically look for\n"
446" them. The only result of any option starting with 'udb' is an error\n"
447" in the boot output.\n\n"
448);
449
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000450/* Only changed by ubd_init, which is an initcall. */
451static int thread_fd = -1;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000452
453/* Function to read several request pointers at a time
454* handling fractional reads if (and as) needed
455*/
456
457static int bulk_req_safe_read(
458 int fd,
459 struct io_thread_req * (*request_buffer)[],
460 struct io_thread_req **remainder,
461 int *remainder_size,
462 int max_recs
463 )
464{
465 int n = 0;
466 int res = 0;
467
468 if (*remainder_size > 0) {
469 memmove(
470 (char *) request_buffer,
471 (char *) remainder, *remainder_size
472 );
473 n = *remainder_size;
474 }
475
476 res = os_read_file(
477 fd,
478 ((char *) request_buffer) + *remainder_size,
479 sizeof(struct io_thread_req *)*max_recs
480 - *remainder_size
481 );
482 if (res > 0) {
483 n += res;
484 if ((n % sizeof(struct io_thread_req *)) > 0) {
485 /*
486 * Read somehow returned not a multiple of dword
487 * theoretically possible, but never observed in the
488 * wild, so read routine must be able to handle it
489 */
490 *remainder_size = n % sizeof(struct io_thread_req *);
491 WARN(*remainder_size > 0, "UBD IPC read returned a partial result");
492 memmove(
493 remainder,
494 ((char *) request_buffer) +
495 (n/sizeof(struct io_thread_req *))*sizeof(struct io_thread_req *),
496 *remainder_size
497 );
498 n = n - *remainder_size;
499 }
500 } else {
501 n = res;
502 }
503 return n;
504}
505
506/* Called without dev->lock held, and only in interrupt context. */
507static void ubd_handler(void)
508{
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000509 int n;
510 int count;
511
512 while(1){
513 n = bulk_req_safe_read(
514 thread_fd,
515 irq_req_buffer,
516 &irq_remainder,
517 &irq_remainder_size,
518 UBD_REQ_BUFFER_SIZE
519 );
520 if (n < 0) {
521 if(n == -EAGAIN)
522 break;
523 printk(KERN_ERR "spurious interrupt in ubd_handler, "
524 "err = %d\n", -n);
525 return;
526 }
527 for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
David Brazdil0f672f62019-12-10 10:32:29 +0000528 struct io_thread_req *io_req = (*irq_req_buffer)[count];
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000529
David Brazdil0f672f62019-12-10 10:32:29 +0000530 if ((io_req->error == BLK_STS_NOTSUPP) && (req_op(io_req->req) == REQ_OP_DISCARD)) {
531 blk_queue_max_discard_sectors(io_req->req->q, 0);
532 blk_queue_max_write_zeroes_sectors(io_req->req->q, 0);
533 blk_queue_flag_clear(QUEUE_FLAG_DISCARD, io_req->req->q);
534 }
Olivier Deprez0e641232021-09-23 10:07:05 +0200535 blk_mq_end_request(io_req->req, io_req->error);
David Brazdil0f672f62019-12-10 10:32:29 +0000536 kfree(io_req);
537 }
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000538 }
539}
540
541static irqreturn_t ubd_intr(int irq, void *dev)
542{
543 ubd_handler();
544 return IRQ_HANDLED;
545}
546
547/* Only changed by ubd_init, which is an initcall. */
548static int io_pid = -1;
549
550static void kill_io_thread(void)
551{
552 if(io_pid != -1)
553 os_kill_process(io_pid, 1);
554}
555
556__uml_exitcall(kill_io_thread);
557
558static inline int ubd_file_size(struct ubd *ubd_dev, __u64 *size_out)
559{
560 char *file;
561 int fd;
562 int err;
563
564 __u32 version;
565 __u32 align;
566 char *backing_file;
Olivier Deprez157378f2022-04-04 15:47:50 +0200567 time64_t mtime;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000568 unsigned long long size;
569 int sector_size;
570 int bitmap_offset;
571
572 if (ubd_dev->file && ubd_dev->cow.file) {
573 file = ubd_dev->cow.file;
574
575 goto out;
576 }
577
578 fd = os_open_file(ubd_dev->file, of_read(OPENFLAGS()), 0);
579 if (fd < 0)
580 return fd;
581
582 err = read_cow_header(file_reader, &fd, &version, &backing_file, \
583 &mtime, &size, &sector_size, &align, &bitmap_offset);
584 os_close_file(fd);
585
586 if(err == -EINVAL)
587 file = ubd_dev->file;
588 else
589 file = backing_file;
590
591out:
592 return os_file_size(file, size_out);
593}
594
595static int read_cow_bitmap(int fd, void *buf, int offset, int len)
596{
597 int err;
598
599 err = os_pread_file(fd, buf, len, offset);
600 if (err < 0)
601 return err;
602
603 return 0;
604}
605
Olivier Deprez157378f2022-04-04 15:47:50 +0200606static int backing_file_mismatch(char *file, __u64 size, time64_t mtime)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000607{
Olivier Deprez157378f2022-04-04 15:47:50 +0200608 time64_t modtime;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000609 unsigned long long actual;
610 int err;
611
612 err = os_file_modtime(file, &modtime);
613 if (err < 0) {
614 printk(KERN_ERR "Failed to get modification time of backing "
615 "file \"%s\", err = %d\n", file, -err);
616 return err;
617 }
618
619 err = os_file_size(file, &actual);
620 if (err < 0) {
621 printk(KERN_ERR "Failed to get size of backing file \"%s\", "
622 "err = %d\n", file, -err);
623 return err;
624 }
625
626 if (actual != size) {
627 /*__u64 can be a long on AMD64 and with %lu GCC complains; so
628 * the typecast.*/
629 printk(KERN_ERR "Size mismatch (%llu vs %llu) of COW header "
630 "vs backing file\n", (unsigned long long) size, actual);
631 return -EINVAL;
632 }
633 if (modtime != mtime) {
Olivier Deprez157378f2022-04-04 15:47:50 +0200634 printk(KERN_ERR "mtime mismatch (%lld vs %lld) of COW header vs "
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000635 "backing file\n", mtime, modtime);
636 return -EINVAL;
637 }
638 return 0;
639}
640
641static int path_requires_switch(char *from_cmdline, char *from_cow, char *cow)
642{
643 struct uml_stat buf1, buf2;
644 int err;
645
646 if (from_cmdline == NULL)
647 return 0;
648 if (!strcmp(from_cmdline, from_cow))
649 return 0;
650
651 err = os_stat_file(from_cmdline, &buf1);
652 if (err < 0) {
653 printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cmdline,
654 -err);
655 return 0;
656 }
657 err = os_stat_file(from_cow, &buf2);
658 if (err < 0) {
659 printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cow,
660 -err);
661 return 1;
662 }
663 if ((buf1.ust_dev == buf2.ust_dev) && (buf1.ust_ino == buf2.ust_ino))
664 return 0;
665
666 printk(KERN_ERR "Backing file mismatch - \"%s\" requested, "
667 "\"%s\" specified in COW header of \"%s\"\n",
668 from_cmdline, from_cow, cow);
669 return 1;
670}
671
672static int open_ubd_file(char *file, struct openflags *openflags, int shared,
673 char **backing_file_out, int *bitmap_offset_out,
674 unsigned long *bitmap_len_out, int *data_offset_out,
675 int *create_cow_out)
676{
Olivier Deprez157378f2022-04-04 15:47:50 +0200677 time64_t mtime;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000678 unsigned long long size;
679 __u32 version, align;
680 char *backing_file;
681 int fd, err, sectorsize, asked_switch, mode = 0644;
682
683 fd = os_open_file(file, *openflags, mode);
684 if (fd < 0) {
685 if ((fd == -ENOENT) && (create_cow_out != NULL))
686 *create_cow_out = 1;
687 if (!openflags->w ||
688 ((fd != -EROFS) && (fd != -EACCES)))
689 return fd;
690 openflags->w = 0;
691 fd = os_open_file(file, *openflags, mode);
692 if (fd < 0)
693 return fd;
694 }
695
696 if (shared)
697 printk(KERN_INFO "Not locking \"%s\" on the host\n", file);
698 else {
699 err = os_lock_file(fd, openflags->w);
700 if (err < 0) {
701 printk(KERN_ERR "Failed to lock '%s', err = %d\n",
702 file, -err);
703 goto out_close;
704 }
705 }
706
707 /* Successful return case! */
708 if (backing_file_out == NULL)
709 return fd;
710
711 err = read_cow_header(file_reader, &fd, &version, &backing_file, &mtime,
712 &size, &sectorsize, &align, bitmap_offset_out);
713 if (err && (*backing_file_out != NULL)) {
714 printk(KERN_ERR "Failed to read COW header from COW file "
715 "\"%s\", errno = %d\n", file, -err);
716 goto out_close;
717 }
718 if (err)
719 return fd;
720
721 asked_switch = path_requires_switch(*backing_file_out, backing_file,
722 file);
723
724 /* Allow switching only if no mismatch. */
725 if (asked_switch && !backing_file_mismatch(*backing_file_out, size,
726 mtime)) {
727 printk(KERN_ERR "Switching backing file to '%s'\n",
728 *backing_file_out);
729 err = write_cow_header(file, fd, *backing_file_out,
730 sectorsize, align, &size);
731 if (err) {
732 printk(KERN_ERR "Switch failed, errno = %d\n", -err);
733 goto out_close;
734 }
735 } else {
736 *backing_file_out = backing_file;
737 err = backing_file_mismatch(*backing_file_out, size, mtime);
738 if (err)
739 goto out_close;
740 }
741
742 cow_sizes(version, size, sectorsize, align, *bitmap_offset_out,
743 bitmap_len_out, data_offset_out);
744
745 return fd;
746 out_close:
747 os_close_file(fd);
748 return err;
749}
750
751static int create_cow_file(char *cow_file, char *backing_file,
752 struct openflags flags,
753 int sectorsize, int alignment, int *bitmap_offset_out,
754 unsigned long *bitmap_len_out, int *data_offset_out)
755{
756 int err, fd;
757
758 flags.c = 1;
759 fd = open_ubd_file(cow_file, &flags, 0, NULL, NULL, NULL, NULL, NULL);
760 if (fd < 0) {
761 err = fd;
762 printk(KERN_ERR "Open of COW file '%s' failed, errno = %d\n",
763 cow_file, -err);
764 goto out;
765 }
766
767 err = init_cow_file(fd, cow_file, backing_file, sectorsize, alignment,
768 bitmap_offset_out, bitmap_len_out,
769 data_offset_out);
770 if (!err)
771 return fd;
772 os_close_file(fd);
773 out:
774 return err;
775}
776
777static void ubd_close_dev(struct ubd *ubd_dev)
778{
779 os_close_file(ubd_dev->fd);
780 if(ubd_dev->cow.file == NULL)
781 return;
782
783 os_close_file(ubd_dev->cow.fd);
784 vfree(ubd_dev->cow.bitmap);
785 ubd_dev->cow.bitmap = NULL;
786}
787
788static int ubd_open_dev(struct ubd *ubd_dev)
789{
790 struct openflags flags;
791 char **back_ptr;
792 int err, create_cow, *create_ptr;
793 int fd;
794
795 ubd_dev->openflags = ubd_dev->boot_openflags;
796 create_cow = 0;
797 create_ptr = (ubd_dev->cow.file != NULL) ? &create_cow : NULL;
798 back_ptr = ubd_dev->no_cow ? NULL : &ubd_dev->cow.file;
799
800 fd = open_ubd_file(ubd_dev->file, &ubd_dev->openflags, ubd_dev->shared,
801 back_ptr, &ubd_dev->cow.bitmap_offset,
802 &ubd_dev->cow.bitmap_len, &ubd_dev->cow.data_offset,
803 create_ptr);
804
805 if((fd == -ENOENT) && create_cow){
806 fd = create_cow_file(ubd_dev->file, ubd_dev->cow.file,
David Brazdil0f672f62019-12-10 10:32:29 +0000807 ubd_dev->openflags, SECTOR_SIZE, PAGE_SIZE,
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000808 &ubd_dev->cow.bitmap_offset,
809 &ubd_dev->cow.bitmap_len,
810 &ubd_dev->cow.data_offset);
811 if(fd >= 0){
812 printk(KERN_INFO "Creating \"%s\" as COW file for "
813 "\"%s\"\n", ubd_dev->file, ubd_dev->cow.file);
814 }
815 }
816
817 if(fd < 0){
818 printk("Failed to open '%s', errno = %d\n", ubd_dev->file,
819 -fd);
820 return fd;
821 }
822 ubd_dev->fd = fd;
823
824 if(ubd_dev->cow.file != NULL){
825 blk_queue_max_hw_sectors(ubd_dev->queue, 8 * sizeof(long));
826
827 err = -ENOMEM;
828 ubd_dev->cow.bitmap = vmalloc(ubd_dev->cow.bitmap_len);
829 if(ubd_dev->cow.bitmap == NULL){
830 printk(KERN_ERR "Failed to vmalloc COW bitmap\n");
831 goto error;
832 }
833 flush_tlb_kernel_vm();
834
835 err = read_cow_bitmap(ubd_dev->fd, ubd_dev->cow.bitmap,
836 ubd_dev->cow.bitmap_offset,
837 ubd_dev->cow.bitmap_len);
838 if(err < 0)
839 goto error;
840
841 flags = ubd_dev->openflags;
842 flags.w = 0;
843 err = open_ubd_file(ubd_dev->cow.file, &flags, ubd_dev->shared, NULL,
844 NULL, NULL, NULL, NULL);
845 if(err < 0) goto error;
846 ubd_dev->cow.fd = err;
847 }
David Brazdil0f672f62019-12-10 10:32:29 +0000848 if (ubd_dev->no_trim == 0) {
849 ubd_dev->queue->limits.discard_granularity = SECTOR_SIZE;
850 ubd_dev->queue->limits.discard_alignment = SECTOR_SIZE;
851 blk_queue_max_discard_sectors(ubd_dev->queue, UBD_MAX_REQUEST);
852 blk_queue_max_write_zeroes_sectors(ubd_dev->queue, UBD_MAX_REQUEST);
853 blk_queue_flag_set(QUEUE_FLAG_DISCARD, ubd_dev->queue);
854 }
855 blk_queue_flag_set(QUEUE_FLAG_NONROT, ubd_dev->queue);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000856 return 0;
857 error:
858 os_close_file(ubd_dev->fd);
859 return err;
860}
861
862static void ubd_device_release(struct device *dev)
863{
864 struct ubd *ubd_dev = dev_get_drvdata(dev);
865
866 blk_cleanup_queue(ubd_dev->queue);
David Brazdil0f672f62019-12-10 10:32:29 +0000867 blk_mq_free_tag_set(&ubd_dev->tag_set);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000868 *ubd_dev = ((struct ubd) DEFAULT_UBD);
869}
870
871static int ubd_disk_register(int major, u64 size, int unit,
872 struct gendisk **disk_out)
873{
874 struct device *parent = NULL;
875 struct gendisk *disk;
876
877 disk = alloc_disk(1 << UBD_SHIFT);
878 if(disk == NULL)
879 return -ENOMEM;
880
881 disk->major = major;
882 disk->first_minor = unit << UBD_SHIFT;
883 disk->fops = &ubd_blops;
884 set_capacity(disk, size / 512);
885 if (major == UBD_MAJOR)
886 sprintf(disk->disk_name, "ubd%c", 'a' + unit);
887 else
888 sprintf(disk->disk_name, "ubd_fake%d", unit);
889
890 /* sysfs register (not for ide fake devices) */
891 if (major == UBD_MAJOR) {
892 ubd_devs[unit].pdev.id = unit;
893 ubd_devs[unit].pdev.name = DRIVER_NAME;
894 ubd_devs[unit].pdev.dev.release = ubd_device_release;
895 dev_set_drvdata(&ubd_devs[unit].pdev.dev, &ubd_devs[unit]);
896 platform_device_register(&ubd_devs[unit].pdev);
897 parent = &ubd_devs[unit].pdev.dev;
898 }
899
900 disk->private_data = &ubd_devs[unit];
901 disk->queue = ubd_devs[unit].queue;
David Brazdil0f672f62019-12-10 10:32:29 +0000902 device_add_disk(parent, disk, NULL);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000903
904 *disk_out = disk;
905 return 0;
906}
907
David Brazdil0f672f62019-12-10 10:32:29 +0000908#define ROUND_BLOCK(n) ((n + (SECTOR_SIZE - 1)) & (-SECTOR_SIZE))
909
910static const struct blk_mq_ops ubd_mq_ops = {
911 .queue_rq = ubd_queue_rq,
912};
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000913
914static int ubd_add(int n, char **error_out)
915{
916 struct ubd *ubd_dev = &ubd_devs[n];
917 int err = 0;
918
919 if(ubd_dev->file == NULL)
920 goto out;
921
922 err = ubd_file_size(ubd_dev, &ubd_dev->size);
923 if(err < 0){
924 *error_out = "Couldn't determine size of device's file";
925 goto out;
926 }
927
928 ubd_dev->size = ROUND_BLOCK(ubd_dev->size);
929
David Brazdil0f672f62019-12-10 10:32:29 +0000930 ubd_dev->tag_set.ops = &ubd_mq_ops;
931 ubd_dev->tag_set.queue_depth = 64;
932 ubd_dev->tag_set.numa_node = NUMA_NO_NODE;
933 ubd_dev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
934 ubd_dev->tag_set.driver_data = ubd_dev;
935 ubd_dev->tag_set.nr_hw_queues = 1;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000936
David Brazdil0f672f62019-12-10 10:32:29 +0000937 err = blk_mq_alloc_tag_set(&ubd_dev->tag_set);
938 if (err)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000939 goto out;
David Brazdil0f672f62019-12-10 10:32:29 +0000940
941 ubd_dev->queue = blk_mq_init_queue(&ubd_dev->tag_set);
942 if (IS_ERR(ubd_dev->queue)) {
943 err = PTR_ERR(ubd_dev->queue);
944 goto out_cleanup_tags;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000945 }
David Brazdil0f672f62019-12-10 10:32:29 +0000946
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000947 ubd_dev->queue->queuedata = ubd_dev;
948 blk_queue_write_cache(ubd_dev->queue, true, false);
949
950 blk_queue_max_segments(ubd_dev->queue, MAX_SG);
Olivier Deprez0e641232021-09-23 10:07:05 +0200951 blk_queue_segment_boundary(ubd_dev->queue, PAGE_SIZE - 1);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000952 err = ubd_disk_register(UBD_MAJOR, ubd_dev->size, n, &ubd_gendisk[n]);
953 if(err){
954 *error_out = "Failed to register device";
David Brazdil0f672f62019-12-10 10:32:29 +0000955 goto out_cleanup_tags;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000956 }
957
958 if (fake_major != UBD_MAJOR)
959 ubd_disk_register(fake_major, ubd_dev->size, n,
960 &fake_gendisk[n]);
961
962 /*
963 * Perhaps this should also be under the "if (fake_major)" above
964 * using the fake_disk->disk_name
965 */
966 if (fake_ide)
967 make_ide_entries(ubd_gendisk[n]->disk_name);
968
969 err = 0;
970out:
971 return err;
972
David Brazdil0f672f62019-12-10 10:32:29 +0000973out_cleanup_tags:
974 blk_mq_free_tag_set(&ubd_dev->tag_set);
975 if (!(IS_ERR(ubd_dev->queue)))
976 blk_cleanup_queue(ubd_dev->queue);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000977 goto out;
978}
979
980static int ubd_config(char *str, char **error_out)
981{
982 int n, ret;
983
984 /* This string is possibly broken up and stored, so it's only
985 * freed if ubd_setup_common fails, or if only general options
986 * were set.
987 */
988 str = kstrdup(str, GFP_KERNEL);
989 if (str == NULL) {
990 *error_out = "Failed to allocate memory";
991 return -ENOMEM;
992 }
993
994 ret = ubd_setup_common(str, &n, error_out);
995 if (ret)
996 goto err_free;
997
998 if (n == -1) {
999 ret = 0;
1000 goto err_free;
1001 }
1002
1003 mutex_lock(&ubd_lock);
1004 ret = ubd_add(n, error_out);
1005 if (ret)
1006 ubd_devs[n].file = NULL;
1007 mutex_unlock(&ubd_lock);
1008
1009out:
1010 return ret;
1011
1012err_free:
1013 kfree(str);
1014 goto out;
1015}
1016
1017static int ubd_get_config(char *name, char *str, int size, char **error_out)
1018{
1019 struct ubd *ubd_dev;
1020 int n, len = 0;
1021
1022 n = parse_unit(&name);
1023 if((n >= MAX_DEV) || (n < 0)){
1024 *error_out = "ubd_get_config : device number out of range";
1025 return -1;
1026 }
1027
1028 ubd_dev = &ubd_devs[n];
1029 mutex_lock(&ubd_lock);
1030
1031 if(ubd_dev->file == NULL){
1032 CONFIG_CHUNK(str, size, len, "", 1);
1033 goto out;
1034 }
1035
1036 CONFIG_CHUNK(str, size, len, ubd_dev->file, 0);
1037
1038 if(ubd_dev->cow.file != NULL){
1039 CONFIG_CHUNK(str, size, len, ",", 0);
1040 CONFIG_CHUNK(str, size, len, ubd_dev->cow.file, 1);
1041 }
1042 else CONFIG_CHUNK(str, size, len, "", 1);
1043
1044 out:
1045 mutex_unlock(&ubd_lock);
1046 return len;
1047}
1048
1049static int ubd_id(char **str, int *start_out, int *end_out)
1050{
1051 int n;
1052
1053 n = parse_unit(str);
1054 *start_out = 0;
1055 *end_out = MAX_DEV - 1;
1056 return n;
1057}
1058
1059static int ubd_remove(int n, char **error_out)
1060{
1061 struct gendisk *disk = ubd_gendisk[n];
1062 struct ubd *ubd_dev;
1063 int err = -ENODEV;
1064
1065 mutex_lock(&ubd_lock);
1066
1067 ubd_dev = &ubd_devs[n];
1068
1069 if(ubd_dev->file == NULL)
1070 goto out;
1071
1072 /* you cannot remove a open disk */
1073 err = -EBUSY;
1074 if(ubd_dev->count > 0)
1075 goto out;
1076
1077 ubd_gendisk[n] = NULL;
1078 if(disk != NULL){
1079 del_gendisk(disk);
1080 put_disk(disk);
1081 }
1082
1083 if(fake_gendisk[n] != NULL){
1084 del_gendisk(fake_gendisk[n]);
1085 put_disk(fake_gendisk[n]);
1086 fake_gendisk[n] = NULL;
1087 }
1088
1089 err = 0;
1090 platform_device_unregister(&ubd_dev->pdev);
1091out:
1092 mutex_unlock(&ubd_lock);
1093 return err;
1094}
1095
1096/* All these are called by mconsole in process context and without
1097 * ubd-specific locks. The structure itself is const except for .list.
1098 */
1099static struct mc_device ubd_mc = {
1100 .list = LIST_HEAD_INIT(ubd_mc.list),
1101 .name = "ubd",
1102 .config = ubd_config,
1103 .get_config = ubd_get_config,
1104 .id = ubd_id,
1105 .remove = ubd_remove,
1106};
1107
1108static int __init ubd_mc_init(void)
1109{
1110 mconsole_register_dev(&ubd_mc);
1111 return 0;
1112}
1113
1114__initcall(ubd_mc_init);
1115
1116static int __init ubd0_init(void)
1117{
1118 struct ubd *ubd_dev = &ubd_devs[0];
1119
1120 mutex_lock(&ubd_lock);
1121 if(ubd_dev->file == NULL)
1122 ubd_dev->file = "root_fs";
1123 mutex_unlock(&ubd_lock);
1124
1125 return 0;
1126}
1127
1128__initcall(ubd0_init);
1129
1130/* Used in ubd_init, which is an initcall */
1131static struct platform_driver ubd_driver = {
1132 .driver = {
1133 .name = DRIVER_NAME,
1134 },
1135};
1136
1137static int __init ubd_init(void)
1138{
1139 char *error;
1140 int i, err;
1141
1142 if (register_blkdev(UBD_MAJOR, "ubd"))
1143 return -1;
1144
1145 if (fake_major != UBD_MAJOR) {
1146 char name[sizeof("ubd_nnn\0")];
1147
1148 snprintf(name, sizeof(name), "ubd_%d", fake_major);
1149 if (register_blkdev(fake_major, "ubd"))
1150 return -1;
1151 }
1152
1153 irq_req_buffer = kmalloc_array(UBD_REQ_BUFFER_SIZE,
1154 sizeof(struct io_thread_req *),
1155 GFP_KERNEL
1156 );
1157 irq_remainder = 0;
1158
1159 if (irq_req_buffer == NULL) {
1160 printk(KERN_ERR "Failed to initialize ubd buffering\n");
1161 return -1;
1162 }
1163 io_req_buffer = kmalloc_array(UBD_REQ_BUFFER_SIZE,
1164 sizeof(struct io_thread_req *),
1165 GFP_KERNEL
1166 );
1167
1168 io_remainder = 0;
1169
1170 if (io_req_buffer == NULL) {
1171 printk(KERN_ERR "Failed to initialize ubd buffering\n");
1172 return -1;
1173 }
1174 platform_driver_register(&ubd_driver);
1175 mutex_lock(&ubd_lock);
1176 for (i = 0; i < MAX_DEV; i++){
1177 err = ubd_add(i, &error);
1178 if(err)
1179 printk(KERN_ERR "Failed to initialize ubd device %d :"
1180 "%s\n", i, error);
1181 }
1182 mutex_unlock(&ubd_lock);
1183 return 0;
1184}
1185
1186late_initcall(ubd_init);
1187
1188static int __init ubd_driver_init(void){
1189 unsigned long stack;
1190 int err;
1191
1192 /* Set by CONFIG_BLK_DEV_UBD_SYNC or ubd=sync.*/
1193 if(global_openflags.s){
1194 printk(KERN_INFO "ubd: Synchronous mode\n");
1195 /* Letting ubd=sync be like using ubd#s= instead of ubd#= is
1196 * enough. So use anyway the io thread. */
1197 }
1198 stack = alloc_stack(0, 0);
1199 io_pid = start_io_thread(stack + PAGE_SIZE - sizeof(void *),
1200 &thread_fd);
1201 if(io_pid < 0){
1202 printk(KERN_ERR
1203 "ubd : Failed to start I/O thread (errno = %d) - "
1204 "falling back to synchronous I/O\n", -io_pid);
1205 io_pid = -1;
1206 return 0;
1207 }
1208 err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr,
1209 0, "ubd", ubd_devs);
1210 if(err != 0)
1211 printk(KERN_ERR "um_request_irq failed - errno = %d\n", -err);
1212 return 0;
1213}
1214
1215device_initcall(ubd_driver_init);
1216
1217static int ubd_open(struct block_device *bdev, fmode_t mode)
1218{
1219 struct gendisk *disk = bdev->bd_disk;
1220 struct ubd *ubd_dev = disk->private_data;
1221 int err = 0;
1222
1223 mutex_lock(&ubd_mutex);
1224 if(ubd_dev->count == 0){
1225 err = ubd_open_dev(ubd_dev);
1226 if(err){
1227 printk(KERN_ERR "%s: Can't open \"%s\": errno = %d\n",
1228 disk->disk_name, ubd_dev->file, -err);
1229 goto out;
1230 }
1231 }
1232 ubd_dev->count++;
1233 set_disk_ro(disk, !ubd_dev->openflags.w);
1234
1235 /* This should no more be needed. And it didn't work anyway to exclude
1236 * read-write remounting of filesystems.*/
1237 /*if((mode & FMODE_WRITE) && !ubd_dev->openflags.w){
1238 if(--ubd_dev->count == 0) ubd_close_dev(ubd_dev);
1239 err = -EROFS;
1240 }*/
1241out:
1242 mutex_unlock(&ubd_mutex);
1243 return err;
1244}
1245
1246static void ubd_release(struct gendisk *disk, fmode_t mode)
1247{
1248 struct ubd *ubd_dev = disk->private_data;
1249
1250 mutex_lock(&ubd_mutex);
1251 if(--ubd_dev->count == 0)
1252 ubd_close_dev(ubd_dev);
1253 mutex_unlock(&ubd_mutex);
1254}
1255
1256static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask,
1257 __u64 *cow_offset, unsigned long *bitmap,
1258 __u64 bitmap_offset, unsigned long *bitmap_words,
1259 __u64 bitmap_len)
1260{
David Brazdil0f672f62019-12-10 10:32:29 +00001261 __u64 sector = io_offset >> SECTOR_SHIFT;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001262 int i, update_bitmap = 0;
1263
David Brazdil0f672f62019-12-10 10:32:29 +00001264 for (i = 0; i < length >> SECTOR_SHIFT; i++) {
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001265 if(cow_mask != NULL)
1266 ubd_set_bit(i, (unsigned char *) cow_mask);
1267 if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
1268 continue;
1269
1270 update_bitmap = 1;
1271 ubd_set_bit(sector + i, (unsigned char *) bitmap);
1272 }
1273
1274 if(!update_bitmap)
1275 return;
1276
1277 *cow_offset = sector / (sizeof(unsigned long) * 8);
1278
1279 /* This takes care of the case where we're exactly at the end of the
1280 * device, and *cow_offset + 1 is off the end. So, just back it up
1281 * by one word. Thanks to Lynn Kerby for the fix and James McMechan
1282 * for the original diagnosis.
1283 */
1284 if (*cow_offset == (DIV_ROUND_UP(bitmap_len,
1285 sizeof(unsigned long)) - 1))
1286 (*cow_offset)--;
1287
1288 bitmap_words[0] = bitmap[*cow_offset];
1289 bitmap_words[1] = bitmap[*cow_offset + 1];
1290
1291 *cow_offset *= sizeof(unsigned long);
1292 *cow_offset += bitmap_offset;
1293}
1294
Olivier Deprez0e641232021-09-23 10:07:05 +02001295static void cowify_req(struct io_thread_req *req, struct io_desc *segment,
1296 unsigned long offset, unsigned long *bitmap,
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001297 __u64 bitmap_offset, __u64 bitmap_len)
1298{
Olivier Deprez0e641232021-09-23 10:07:05 +02001299 __u64 sector = offset >> SECTOR_SHIFT;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001300 int i;
1301
Olivier Deprez0e641232021-09-23 10:07:05 +02001302 if (segment->length > (sizeof(segment->sector_mask) * 8) << SECTOR_SHIFT)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001303 panic("Operation too long");
1304
David Brazdil0f672f62019-12-10 10:32:29 +00001305 if (req_op(req->req) == REQ_OP_READ) {
Olivier Deprez0e641232021-09-23 10:07:05 +02001306 for (i = 0; i < segment->length >> SECTOR_SHIFT; i++) {
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001307 if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
1308 ubd_set_bit(i, (unsigned char *)
Olivier Deprez0e641232021-09-23 10:07:05 +02001309 &segment->sector_mask);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001310 }
Olivier Deprez0e641232021-09-23 10:07:05 +02001311 } else {
1312 cowify_bitmap(offset, segment->length, &segment->sector_mask,
1313 &segment->cow_offset, bitmap, bitmap_offset,
1314 segment->bitmap_words, bitmap_len);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001315 }
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001316}
1317
Olivier Deprez0e641232021-09-23 10:07:05 +02001318static void ubd_map_req(struct ubd *dev, struct io_thread_req *io_req,
1319 struct request *req)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001320{
Olivier Deprez0e641232021-09-23 10:07:05 +02001321 struct bio_vec bvec;
1322 struct req_iterator iter;
1323 int i = 0;
1324 unsigned long byte_offset = io_req->offset;
1325 int op = req_op(req);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001326
Olivier Deprez0e641232021-09-23 10:07:05 +02001327 if (op == REQ_OP_WRITE_ZEROES || op == REQ_OP_DISCARD) {
1328 io_req->io_desc[0].buffer = NULL;
1329 io_req->io_desc[0].length = blk_rq_bytes(req);
1330 } else {
1331 rq_for_each_segment(bvec, req, iter) {
1332 BUG_ON(i >= io_req->desc_cnt);
1333
1334 io_req->io_desc[i].buffer =
1335 page_address(bvec.bv_page) + bvec.bv_offset;
1336 io_req->io_desc[i].length = bvec.bv_len;
1337 i++;
1338 }
1339 }
1340
1341 if (dev->cow.file) {
1342 for (i = 0; i < io_req->desc_cnt; i++) {
1343 cowify_req(io_req, &io_req->io_desc[i], byte_offset,
1344 dev->cow.bitmap, dev->cow.bitmap_offset,
1345 dev->cow.bitmap_len);
1346 byte_offset += io_req->io_desc[i].length;
1347 }
1348
1349 }
1350}
1351
1352static struct io_thread_req *ubd_alloc_req(struct ubd *dev, struct request *req,
1353 int desc_cnt)
1354{
1355 struct io_thread_req *io_req;
1356 int i;
1357
1358 io_req = kmalloc(sizeof(*io_req) +
1359 (desc_cnt * sizeof(struct io_desc)),
1360 GFP_ATOMIC);
David Brazdil0f672f62019-12-10 10:32:29 +00001361 if (!io_req)
Olivier Deprez0e641232021-09-23 10:07:05 +02001362 return NULL;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001363
David Brazdil0f672f62019-12-10 10:32:29 +00001364 io_req->req = req;
1365 if (dev->cow.file)
1366 io_req->fds[0] = dev->cow.fd;
1367 else
1368 io_req->fds[0] = dev->fd;
1369 io_req->error = 0;
David Brazdil0f672f62019-12-10 10:32:29 +00001370 io_req->sectorsize = SECTOR_SIZE;
1371 io_req->fds[1] = dev->fd;
Olivier Deprez0e641232021-09-23 10:07:05 +02001372 io_req->offset = (u64) blk_rq_pos(req) << SECTOR_SHIFT;
David Brazdil0f672f62019-12-10 10:32:29 +00001373 io_req->offsets[0] = 0;
1374 io_req->offsets[1] = dev->cow.data_offset;
1375
Olivier Deprez0e641232021-09-23 10:07:05 +02001376 for (i = 0 ; i < desc_cnt; i++) {
1377 io_req->io_desc[i].sector_mask = 0;
1378 io_req->io_desc[i].cow_offset = -1;
1379 }
1380
1381 return io_req;
1382}
1383
1384static int ubd_submit_request(struct ubd *dev, struct request *req)
1385{
1386 int segs = 0;
1387 struct io_thread_req *io_req;
1388 int ret;
1389 int op = req_op(req);
1390
1391 if (op == REQ_OP_FLUSH)
1392 segs = 0;
1393 else if (op == REQ_OP_WRITE_ZEROES || op == REQ_OP_DISCARD)
1394 segs = 1;
1395 else
1396 segs = blk_rq_nr_phys_segments(req);
1397
1398 io_req = ubd_alloc_req(dev, req, segs);
1399 if (!io_req)
1400 return -ENOMEM;
1401
1402 io_req->desc_cnt = segs;
1403 if (segs)
1404 ubd_map_req(dev, io_req, req);
David Brazdil0f672f62019-12-10 10:32:29 +00001405
1406 ret = os_write_file(thread_fd, &io_req, sizeof(io_req));
1407 if (ret != sizeof(io_req)) {
1408 if (ret != -EAGAIN)
1409 pr_err("write to io thread failed: %d\n", -ret);
1410 kfree(io_req);
1411 }
1412 return ret;
1413}
1414
David Brazdil0f672f62019-12-10 10:32:29 +00001415static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx,
1416 const struct blk_mq_queue_data *bd)
1417{
1418 struct ubd *ubd_dev = hctx->queue->queuedata;
1419 struct request *req = bd->rq;
1420 int ret = 0, res = BLK_STS_OK;
1421
1422 blk_mq_start_request(req);
1423
1424 spin_lock_irq(&ubd_dev->lock);
1425
1426 switch (req_op(req)) {
David Brazdil0f672f62019-12-10 10:32:29 +00001427 case REQ_OP_FLUSH:
David Brazdil0f672f62019-12-10 10:32:29 +00001428 case REQ_OP_READ:
1429 case REQ_OP_WRITE:
David Brazdil0f672f62019-12-10 10:32:29 +00001430 case REQ_OP_DISCARD:
1431 case REQ_OP_WRITE_ZEROES:
Olivier Deprez0e641232021-09-23 10:07:05 +02001432 ret = ubd_submit_request(ubd_dev, req);
David Brazdil0f672f62019-12-10 10:32:29 +00001433 break;
1434 default:
1435 WARN_ON_ONCE(1);
1436 res = BLK_STS_NOTSUPP;
1437 }
1438
1439 spin_unlock_irq(&ubd_dev->lock);
1440
1441 if (ret < 0) {
1442 if (ret == -ENOMEM)
1443 res = BLK_STS_RESOURCE;
1444 else
1445 res = BLK_STS_DEV_RESOURCE;
1446 }
1447
1448 return res;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001449}
1450
1451static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
1452{
1453 struct ubd *ubd_dev = bdev->bd_disk->private_data;
1454
1455 geo->heads = 128;
1456 geo->sectors = 32;
1457 geo->cylinders = ubd_dev->size / (128 * 32 * 512);
1458 return 0;
1459}
1460
1461static int ubd_ioctl(struct block_device *bdev, fmode_t mode,
1462 unsigned int cmd, unsigned long arg)
1463{
1464 struct ubd *ubd_dev = bdev->bd_disk->private_data;
1465 u16 ubd_id[ATA_ID_WORDS];
1466
1467 switch (cmd) {
1468 struct cdrom_volctrl volume;
1469 case HDIO_GET_IDENTITY:
1470 memset(&ubd_id, 0, ATA_ID_WORDS * 2);
1471 ubd_id[ATA_ID_CYLS] = ubd_dev->size / (128 * 32 * 512);
1472 ubd_id[ATA_ID_HEADS] = 128;
1473 ubd_id[ATA_ID_SECTORS] = 32;
1474 if(copy_to_user((char __user *) arg, (char *) &ubd_id,
1475 sizeof(ubd_id)))
1476 return -EFAULT;
1477 return 0;
1478
1479 case CDROMVOLREAD:
1480 if(copy_from_user(&volume, (char __user *) arg, sizeof(volume)))
1481 return -EFAULT;
1482 volume.channel0 = 255;
1483 volume.channel1 = 255;
1484 volume.channel2 = 255;
1485 volume.channel3 = 255;
1486 if(copy_to_user((char __user *) arg, &volume, sizeof(volume)))
1487 return -EFAULT;
1488 return 0;
1489 }
1490 return -EINVAL;
1491}
1492
David Brazdil0f672f62019-12-10 10:32:29 +00001493static int map_error(int error_code)
1494{
1495 switch (error_code) {
1496 case 0:
1497 return BLK_STS_OK;
1498 case ENOSYS:
1499 case EOPNOTSUPP:
1500 return BLK_STS_NOTSUPP;
1501 case ENOSPC:
1502 return BLK_STS_NOSPC;
1503 }
1504 return BLK_STS_IOERR;
1505}
1506
1507/*
1508 * Everything from here onwards *IS NOT PART OF THE KERNEL*
1509 *
1510 * The following functions are part of UML hypervisor code.
1511 * All functions from here onwards are executed as a helper
1512 * thread and are not allowed to execute any kernel functions.
1513 *
1514 * Any communication must occur strictly via shared memory and IPC.
1515 *
1516 * Do not add printks, locks, kernel memory operations, etc - it
1517 * will result in unpredictable behaviour and/or crashes.
1518 */
1519
Olivier Deprez0e641232021-09-23 10:07:05 +02001520static int update_bitmap(struct io_thread_req *req, struct io_desc *segment)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001521{
1522 int n;
1523
Olivier Deprez0e641232021-09-23 10:07:05 +02001524 if (segment->cow_offset == -1)
David Brazdil0f672f62019-12-10 10:32:29 +00001525 return map_error(0);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001526
Olivier Deprez0e641232021-09-23 10:07:05 +02001527 n = os_pwrite_file(req->fds[1], &segment->bitmap_words,
1528 sizeof(segment->bitmap_words), segment->cow_offset);
1529 if (n != sizeof(segment->bitmap_words))
David Brazdil0f672f62019-12-10 10:32:29 +00001530 return map_error(-n);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001531
David Brazdil0f672f62019-12-10 10:32:29 +00001532 return map_error(0);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001533}
1534
Olivier Deprez0e641232021-09-23 10:07:05 +02001535static void do_io(struct io_thread_req *req, struct io_desc *desc)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001536{
David Brazdil0f672f62019-12-10 10:32:29 +00001537 char *buf = NULL;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001538 unsigned long len;
1539 int n, nsectors, start, end, bit;
1540 __u64 off;
1541
David Brazdil0f672f62019-12-10 10:32:29 +00001542 /* FLUSH is really a special case, we cannot "case" it with others */
1543
1544 if (req_op(req->req) == REQ_OP_FLUSH) {
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001545 /* fds[0] is always either the rw image or our cow file */
David Brazdil0f672f62019-12-10 10:32:29 +00001546 req->error = map_error(-os_sync_file(req->fds[0]));
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001547 return;
1548 }
1549
Olivier Deprez0e641232021-09-23 10:07:05 +02001550 nsectors = desc->length / req->sectorsize;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001551 start = 0;
1552 do {
Olivier Deprez0e641232021-09-23 10:07:05 +02001553 bit = ubd_test_bit(start, (unsigned char *) &desc->sector_mask);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001554 end = start;
1555 while((end < nsectors) &&
Olivier Deprez0e641232021-09-23 10:07:05 +02001556 (ubd_test_bit(end, (unsigned char *) &desc->sector_mask) == bit))
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001557 end++;
1558
1559 off = req->offset + req->offsets[bit] +
1560 start * req->sectorsize;
1561 len = (end - start) * req->sectorsize;
Olivier Deprez0e641232021-09-23 10:07:05 +02001562 if (desc->buffer != NULL)
1563 buf = &desc->buffer[start * req->sectorsize];
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001564
David Brazdil0f672f62019-12-10 10:32:29 +00001565 switch (req_op(req->req)) {
1566 case REQ_OP_READ:
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001567 n = 0;
1568 do {
1569 buf = &buf[n];
1570 len -= n;
1571 n = os_pread_file(req->fds[bit], buf, len, off);
1572 if (n < 0) {
David Brazdil0f672f62019-12-10 10:32:29 +00001573 req->error = map_error(-n);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001574 return;
1575 }
1576 } while((n < len) && (n != 0));
1577 if (n < len) memset(&buf[n], 0, len - n);
David Brazdil0f672f62019-12-10 10:32:29 +00001578 break;
1579 case REQ_OP_WRITE:
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001580 n = os_pwrite_file(req->fds[bit], buf, len, off);
1581 if(n != len){
David Brazdil0f672f62019-12-10 10:32:29 +00001582 req->error = map_error(-n);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001583 return;
1584 }
David Brazdil0f672f62019-12-10 10:32:29 +00001585 break;
1586 case REQ_OP_DISCARD:
1587 case REQ_OP_WRITE_ZEROES:
1588 n = os_falloc_punch(req->fds[bit], off, len);
1589 if (n) {
1590 req->error = map_error(-n);
1591 return;
1592 }
1593 break;
1594 default:
1595 WARN_ON_ONCE(1);
1596 req->error = BLK_STS_NOTSUPP;
1597 return;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001598 }
1599
1600 start = end;
1601 } while(start < nsectors);
1602
Olivier Deprez0e641232021-09-23 10:07:05 +02001603 req->offset += len;
1604 req->error = update_bitmap(req, desc);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001605}
1606
1607/* Changed in start_io_thread, which is serialized by being called only
1608 * from ubd_init, which is an initcall.
1609 */
1610int kernel_fd = -1;
1611
1612/* Only changed by the io thread. XXX: currently unused. */
1613static int io_count = 0;
1614
1615int io_thread(void *arg)
1616{
1617 int n, count, written, res;
1618
1619 os_fix_helper_signals();
1620
1621 while(1){
1622 n = bulk_req_safe_read(
1623 kernel_fd,
1624 io_req_buffer,
1625 &io_remainder,
1626 &io_remainder_size,
1627 UBD_REQ_BUFFER_SIZE
1628 );
Olivier Deprez157378f2022-04-04 15:47:50 +02001629 if (n <= 0) {
1630 if (n == -EAGAIN)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001631 ubd_read_poll(-1);
Olivier Deprez157378f2022-04-04 15:47:50 +02001632
1633 continue;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001634 }
1635
1636 for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
Olivier Deprez0e641232021-09-23 10:07:05 +02001637 struct io_thread_req *req = (*io_req_buffer)[count];
1638 int i;
1639
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001640 io_count++;
Olivier Deprez0e641232021-09-23 10:07:05 +02001641 for (i = 0; !req->error && i < req->desc_cnt; i++)
1642 do_io(req, &(req->io_desc[i]));
1643
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001644 }
1645
1646 written = 0;
1647
1648 do {
Olivier Deprez0e641232021-09-23 10:07:05 +02001649 res = os_write_file(kernel_fd,
1650 ((char *) io_req_buffer) + written,
1651 n - written);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001652 if (res >= 0) {
1653 written += res;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001654 }
1655 if (written < n) {
1656 ubd_write_poll(-1);
1657 }
1658 } while (written < n);
1659 }
1660
1661 return 0;
1662}