blob: 4e59ab817d3e7539134fc93a75f8b2f8826ea8fd [file] [log] [blame]
David Brazdil0f672f62019-12-10 10:32:29 +00001// SPDX-License-Identifier: GPL-2.0
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002/*
David Brazdil0f672f62019-12-10 10:32:29 +00003 * Copyright (C) 2018 Cambridge Greys Ltd
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004 * Copyright (C) 2015-2016 Anton Ivanov (aivanov@brocade.com)
5 * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00006 */
7
8/* 2001-09-28...2002-04-17
9 * Partition stuff by James_McMechan@hotmail.com
10 * old style ubd by setting UBD_SHIFT to 0
11 * 2002-09-27...2002-10-18 massive tinkering for 2.5
12 * partitions have changed in 2.5
13 * 2003-01-29 more tinkering for 2.5.59-1
14 * This should now address the sysfs problems and has
15 * the symlink for devfs to allow for booting with
16 * the common /dev/ubd/discX/... names rather than
17 * only /dev/ubdN/discN this version also has lots of
18 * clean ups preparing for ubd-many.
19 * James McMechan
20 */
21
22#define UBD_SHIFT 4
23
24#include <linux/module.h>
25#include <linux/init.h>
26#include <linux/blkdev.h>
David Brazdil0f672f62019-12-10 10:32:29 +000027#include <linux/blk-mq.h>
Andrew Scullb4b6d4a2019-01-02 15:54:55 +000028#include <linux/ata.h>
29#include <linux/hdreg.h>
30#include <linux/cdrom.h>
31#include <linux/proc_fs.h>
32#include <linux/seq_file.h>
33#include <linux/ctype.h>
34#include <linux/slab.h>
35#include <linux/vmalloc.h>
36#include <linux/platform_device.h>
37#include <linux/scatterlist.h>
38#include <asm/tlbflush.h>
39#include <kern_util.h>
40#include "mconsole_kern.h"
41#include <init.h>
42#include <irq_kern.h>
43#include "ubd.h"
44#include <os.h>
45#include "cow.h"
46
David Brazdil0f672f62019-12-10 10:32:29 +000047/* Max request size is determined by sector mask - 32K */
48#define UBD_MAX_REQUEST (8 * sizeof(long))
Andrew Scullb4b6d4a2019-01-02 15:54:55 +000049
Olivier Deprez0e641232021-09-23 10:07:05 +020050struct io_desc {
51 char *buffer;
52 unsigned long length;
53 unsigned long sector_mask;
54 unsigned long long cow_offset;
55 unsigned long bitmap_words[2];
56};
57
Andrew Scullb4b6d4a2019-01-02 15:54:55 +000058struct io_thread_req {
59 struct request *req;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +000060 int fds[2];
61 unsigned long offsets[2];
62 unsigned long long offset;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +000063 int sectorsize;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +000064 int error;
Olivier Deprez0e641232021-09-23 10:07:05 +020065
66 int desc_cnt;
67 /* io_desc has to be the last element of the struct */
68 struct io_desc io_desc[];
Andrew Scullb4b6d4a2019-01-02 15:54:55 +000069};
70
71
72static struct io_thread_req * (*irq_req_buffer)[];
73static struct io_thread_req *irq_remainder;
74static int irq_remainder_size;
75
76static struct io_thread_req * (*io_req_buffer)[];
77static struct io_thread_req *io_remainder;
78static int io_remainder_size;
79
80
81
82static inline int ubd_test_bit(__u64 bit, unsigned char *data)
83{
84 __u64 n;
85 int bits, off;
86
87 bits = sizeof(data[0]) * 8;
88 n = bit / bits;
89 off = bit % bits;
90 return (data[n] & (1 << off)) != 0;
91}
92
93static inline void ubd_set_bit(__u64 bit, unsigned char *data)
94{
95 __u64 n;
96 int bits, off;
97
98 bits = sizeof(data[0]) * 8;
99 n = bit / bits;
100 off = bit % bits;
101 data[n] |= (1 << off);
102}
103/*End stuff from ubd_user.h*/
104
105#define DRIVER_NAME "uml-blkdev"
106
107static DEFINE_MUTEX(ubd_lock);
108static DEFINE_MUTEX(ubd_mutex); /* replaces BKL, might not be needed */
109
110static int ubd_open(struct block_device *bdev, fmode_t mode);
111static void ubd_release(struct gendisk *disk, fmode_t mode);
112static int ubd_ioctl(struct block_device *bdev, fmode_t mode,
113 unsigned int cmd, unsigned long arg);
114static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo);
115
116#define MAX_DEV (16)
117
118static const struct block_device_operations ubd_blops = {
119 .owner = THIS_MODULE,
120 .open = ubd_open,
121 .release = ubd_release,
122 .ioctl = ubd_ioctl,
123 .getgeo = ubd_getgeo,
124};
125
126/* Protected by ubd_lock */
127static int fake_major = UBD_MAJOR;
128static struct gendisk *ubd_gendisk[MAX_DEV];
129static struct gendisk *fake_gendisk[MAX_DEV];
130
131#ifdef CONFIG_BLK_DEV_UBD_SYNC
132#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 1, .c = 0, \
133 .cl = 1 })
134#else
135#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 0, .c = 0, \
136 .cl = 1 })
137#endif
138static struct openflags global_openflags = OPEN_FLAGS;
139
140struct cow {
141 /* backing file name */
142 char *file;
143 /* backing file fd */
144 int fd;
145 unsigned long *bitmap;
146 unsigned long bitmap_len;
147 int bitmap_offset;
148 int data_offset;
149};
150
151#define MAX_SG 64
152
153struct ubd {
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000154 /* name (and fd, below) of the file opened for writing, either the
155 * backing or the cow file. */
156 char *file;
157 int count;
158 int fd;
159 __u64 size;
160 struct openflags boot_openflags;
161 struct openflags openflags;
162 unsigned shared:1;
163 unsigned no_cow:1;
David Brazdil0f672f62019-12-10 10:32:29 +0000164 unsigned no_trim:1;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000165 struct cow cow;
166 struct platform_device pdev;
167 struct request_queue *queue;
David Brazdil0f672f62019-12-10 10:32:29 +0000168 struct blk_mq_tag_set tag_set;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000169 spinlock_t lock;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000170};
171
172#define DEFAULT_COW { \
173 .file = NULL, \
174 .fd = -1, \
175 .bitmap = NULL, \
176 .bitmap_offset = 0, \
177 .data_offset = 0, \
178}
179
180#define DEFAULT_UBD { \
181 .file = NULL, \
182 .count = 0, \
183 .fd = -1, \
184 .size = -1, \
185 .boot_openflags = OPEN_FLAGS, \
186 .openflags = OPEN_FLAGS, \
187 .no_cow = 0, \
David Brazdil0f672f62019-12-10 10:32:29 +0000188 .no_trim = 0, \
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000189 .shared = 0, \
190 .cow = DEFAULT_COW, \
191 .lock = __SPIN_LOCK_UNLOCKED(ubd_devs.lock), \
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000192}
193
194/* Protected by ubd_lock */
195static struct ubd ubd_devs[MAX_DEV] = { [0 ... MAX_DEV - 1] = DEFAULT_UBD };
196
197/* Only changed by fake_ide_setup which is a setup */
198static int fake_ide = 0;
199static struct proc_dir_entry *proc_ide_root = NULL;
200static struct proc_dir_entry *proc_ide = NULL;
201
David Brazdil0f672f62019-12-10 10:32:29 +0000202static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx,
203 const struct blk_mq_queue_data *bd);
204
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000205static void make_proc_ide(void)
206{
207 proc_ide_root = proc_mkdir("ide", NULL);
208 proc_ide = proc_mkdir("ide0", proc_ide_root);
209}
210
211static int fake_ide_media_proc_show(struct seq_file *m, void *v)
212{
213 seq_puts(m, "disk\n");
214 return 0;
215}
216
217static void make_ide_entries(const char *dev_name)
218{
219 struct proc_dir_entry *dir, *ent;
220 char name[64];
221
222 if(proc_ide_root == NULL) make_proc_ide();
223
224 dir = proc_mkdir(dev_name, proc_ide);
225 if(!dir) return;
226
227 ent = proc_create_single("media", S_IRUGO, dir,
228 fake_ide_media_proc_show);
229 if(!ent) return;
230 snprintf(name, sizeof(name), "ide0/%s", dev_name);
231 proc_symlink(dev_name, proc_ide_root, name);
232}
233
234static int fake_ide_setup(char *str)
235{
236 fake_ide = 1;
237 return 1;
238}
239
240__setup("fake_ide", fake_ide_setup);
241
242__uml_help(fake_ide_setup,
243"fake_ide\n"
244" Create ide0 entries that map onto ubd devices.\n\n"
245);
246
247static int parse_unit(char **ptr)
248{
249 char *str = *ptr, *end;
250 int n = -1;
251
252 if(isdigit(*str)) {
253 n = simple_strtoul(str, &end, 0);
254 if(end == str)
255 return -1;
256 *ptr = end;
257 }
258 else if (('a' <= *str) && (*str <= 'z')) {
259 n = *str - 'a';
260 str++;
261 *ptr = str;
262 }
263 return n;
264}
265
266/* If *index_out == -1 at exit, the passed option was a general one;
267 * otherwise, the str pointer is used (and owned) inside ubd_devs array, so it
268 * should not be freed on exit.
269 */
270static int ubd_setup_common(char *str, int *index_out, char **error_out)
271{
272 struct ubd *ubd_dev;
273 struct openflags flags = global_openflags;
274 char *backing_file;
275 int n, err = 0, i;
276
277 if(index_out) *index_out = -1;
278 n = *str;
279 if(n == '='){
280 char *end;
281 int major;
282
283 str++;
284 if(!strcmp(str, "sync")){
285 global_openflags = of_sync(global_openflags);
David Brazdil0f672f62019-12-10 10:32:29 +0000286 return err;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000287 }
288
289 err = -EINVAL;
290 major = simple_strtoul(str, &end, 0);
291 if((*end != '\0') || (end == str)){
292 *error_out = "Didn't parse major number";
David Brazdil0f672f62019-12-10 10:32:29 +0000293 return err;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000294 }
295
296 mutex_lock(&ubd_lock);
297 if (fake_major != UBD_MAJOR) {
298 *error_out = "Can't assign a fake major twice";
299 goto out1;
300 }
301
302 fake_major = major;
303
304 printk(KERN_INFO "Setting extra ubd major number to %d\n",
305 major);
306 err = 0;
307 out1:
308 mutex_unlock(&ubd_lock);
309 return err;
310 }
311
312 n = parse_unit(&str);
313 if(n < 0){
314 *error_out = "Couldn't parse device number";
315 return -EINVAL;
316 }
317 if(n >= MAX_DEV){
318 *error_out = "Device number out of range";
319 return 1;
320 }
321
322 err = -EBUSY;
323 mutex_lock(&ubd_lock);
324
325 ubd_dev = &ubd_devs[n];
326 if(ubd_dev->file != NULL){
327 *error_out = "Device is already configured";
328 goto out;
329 }
330
331 if (index_out)
332 *index_out = n;
333
334 err = -EINVAL;
David Brazdil0f672f62019-12-10 10:32:29 +0000335 for (i = 0; i < sizeof("rscdt="); i++) {
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000336 switch (*str) {
337 case 'r':
338 flags.w = 0;
339 break;
340 case 's':
341 flags.s = 1;
342 break;
343 case 'd':
344 ubd_dev->no_cow = 1;
345 break;
346 case 'c':
347 ubd_dev->shared = 1;
348 break;
David Brazdil0f672f62019-12-10 10:32:29 +0000349 case 't':
350 ubd_dev->no_trim = 1;
351 break;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000352 case '=':
353 str++;
354 goto break_loop;
355 default:
356 *error_out = "Expected '=' or flag letter "
David Brazdil0f672f62019-12-10 10:32:29 +0000357 "(r, s, c, t or d)";
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000358 goto out;
359 }
360 str++;
361 }
362
363 if (*str == '=')
364 *error_out = "Too many flags specified";
365 else
366 *error_out = "Missing '='";
367 goto out;
368
369break_loop:
370 backing_file = strchr(str, ',');
371
372 if (backing_file == NULL)
373 backing_file = strchr(str, ':');
374
375 if(backing_file != NULL){
376 if(ubd_dev->no_cow){
377 *error_out = "Can't specify both 'd' and a cow file";
378 goto out;
379 }
380 else {
381 *backing_file = '\0';
382 backing_file++;
383 }
384 }
385 err = 0;
386 ubd_dev->file = str;
387 ubd_dev->cow.file = backing_file;
388 ubd_dev->boot_openflags = flags;
389out:
390 mutex_unlock(&ubd_lock);
391 return err;
392}
393
394static int ubd_setup(char *str)
395{
396 char *error;
397 int err;
398
399 err = ubd_setup_common(str, NULL, &error);
400 if(err)
401 printk(KERN_ERR "Failed to initialize device with \"%s\" : "
402 "%s\n", str, error);
403 return 1;
404}
405
406__setup("ubd", ubd_setup);
407__uml_help(ubd_setup,
408"ubd<n><flags>=<filename>[(:|,)<filename2>]\n"
409" This is used to associate a device with a file in the underlying\n"
410" filesystem. When specifying two filenames, the first one is the\n"
411" COW name and the second is the backing file name. As separator you can\n"
412" use either a ':' or a ',': the first one allows writing things like;\n"
413" ubd0=~/Uml/root_cow:~/Uml/root_backing_file\n"
414" while with a ',' the shell would not expand the 2nd '~'.\n"
415" When using only one filename, UML will detect whether to treat it like\n"
416" a COW file or a backing file. To override this detection, add the 'd'\n"
417" flag:\n"
418" ubd0d=BackingFile\n"
419" Usually, there is a filesystem in the file, but \n"
420" that's not required. Swap devices containing swap files can be\n"
421" specified like this. Also, a file which doesn't contain a\n"
422" filesystem can have its contents read in the virtual \n"
423" machine by running 'dd' on the device. <n> must be in the range\n"
424" 0 to 7. Appending an 'r' to the number will cause that device\n"
425" to be mounted read-only. For example ubd1r=./ext_fs. Appending\n"
426" an 's' will cause data to be written to disk on the host immediately.\n"
427" 'c' will cause the device to be treated as being shared between multiple\n"
428" UMLs and file locking will be turned off - this is appropriate for a\n"
429" cluster filesystem and inappropriate at almost all other times.\n\n"
David Brazdil0f672f62019-12-10 10:32:29 +0000430" 't' will disable trim/discard support on the device (enabled by default).\n\n"
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000431);
432
433static int udb_setup(char *str)
434{
435 printk("udb%s specified on command line is almost certainly a ubd -> "
436 "udb TYPO\n", str);
437 return 1;
438}
439
440__setup("udb", udb_setup);
441__uml_help(udb_setup,
442"udb\n"
443" This option is here solely to catch ubd -> udb typos, which can be\n"
444" to impossible to catch visually unless you specifically look for\n"
445" them. The only result of any option starting with 'udb' is an error\n"
446" in the boot output.\n\n"
447);
448
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000449/* Only changed by ubd_init, which is an initcall. */
450static int thread_fd = -1;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000451
452/* Function to read several request pointers at a time
453* handling fractional reads if (and as) needed
454*/
455
456static int bulk_req_safe_read(
457 int fd,
458 struct io_thread_req * (*request_buffer)[],
459 struct io_thread_req **remainder,
460 int *remainder_size,
461 int max_recs
462 )
463{
464 int n = 0;
465 int res = 0;
466
467 if (*remainder_size > 0) {
468 memmove(
469 (char *) request_buffer,
470 (char *) remainder, *remainder_size
471 );
472 n = *remainder_size;
473 }
474
475 res = os_read_file(
476 fd,
477 ((char *) request_buffer) + *remainder_size,
478 sizeof(struct io_thread_req *)*max_recs
479 - *remainder_size
480 );
481 if (res > 0) {
482 n += res;
483 if ((n % sizeof(struct io_thread_req *)) > 0) {
484 /*
485 * Read somehow returned not a multiple of dword
486 * theoretically possible, but never observed in the
487 * wild, so read routine must be able to handle it
488 */
489 *remainder_size = n % sizeof(struct io_thread_req *);
490 WARN(*remainder_size > 0, "UBD IPC read returned a partial result");
491 memmove(
492 remainder,
493 ((char *) request_buffer) +
494 (n/sizeof(struct io_thread_req *))*sizeof(struct io_thread_req *),
495 *remainder_size
496 );
497 n = n - *remainder_size;
498 }
499 } else {
500 n = res;
501 }
502 return n;
503}
504
505/* Called without dev->lock held, and only in interrupt context. */
506static void ubd_handler(void)
507{
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000508 int n;
509 int count;
510
511 while(1){
512 n = bulk_req_safe_read(
513 thread_fd,
514 irq_req_buffer,
515 &irq_remainder,
516 &irq_remainder_size,
517 UBD_REQ_BUFFER_SIZE
518 );
519 if (n < 0) {
520 if(n == -EAGAIN)
521 break;
522 printk(KERN_ERR "spurious interrupt in ubd_handler, "
523 "err = %d\n", -n);
524 return;
525 }
526 for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
David Brazdil0f672f62019-12-10 10:32:29 +0000527 struct io_thread_req *io_req = (*irq_req_buffer)[count];
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000528
David Brazdil0f672f62019-12-10 10:32:29 +0000529 if ((io_req->error == BLK_STS_NOTSUPP) && (req_op(io_req->req) == REQ_OP_DISCARD)) {
530 blk_queue_max_discard_sectors(io_req->req->q, 0);
531 blk_queue_max_write_zeroes_sectors(io_req->req->q, 0);
532 blk_queue_flag_clear(QUEUE_FLAG_DISCARD, io_req->req->q);
533 }
Olivier Deprez0e641232021-09-23 10:07:05 +0200534 blk_mq_end_request(io_req->req, io_req->error);
David Brazdil0f672f62019-12-10 10:32:29 +0000535 kfree(io_req);
536 }
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000537 }
538}
539
540static irqreturn_t ubd_intr(int irq, void *dev)
541{
542 ubd_handler();
543 return IRQ_HANDLED;
544}
545
546/* Only changed by ubd_init, which is an initcall. */
547static int io_pid = -1;
548
549static void kill_io_thread(void)
550{
551 if(io_pid != -1)
552 os_kill_process(io_pid, 1);
553}
554
555__uml_exitcall(kill_io_thread);
556
557static inline int ubd_file_size(struct ubd *ubd_dev, __u64 *size_out)
558{
559 char *file;
560 int fd;
561 int err;
562
563 __u32 version;
564 __u32 align;
565 char *backing_file;
566 time_t mtime;
567 unsigned long long size;
568 int sector_size;
569 int bitmap_offset;
570
571 if (ubd_dev->file && ubd_dev->cow.file) {
572 file = ubd_dev->cow.file;
573
574 goto out;
575 }
576
577 fd = os_open_file(ubd_dev->file, of_read(OPENFLAGS()), 0);
578 if (fd < 0)
579 return fd;
580
581 err = read_cow_header(file_reader, &fd, &version, &backing_file, \
582 &mtime, &size, &sector_size, &align, &bitmap_offset);
583 os_close_file(fd);
584
585 if(err == -EINVAL)
586 file = ubd_dev->file;
587 else
588 file = backing_file;
589
590out:
591 return os_file_size(file, size_out);
592}
593
594static int read_cow_bitmap(int fd, void *buf, int offset, int len)
595{
596 int err;
597
598 err = os_pread_file(fd, buf, len, offset);
599 if (err < 0)
600 return err;
601
602 return 0;
603}
604
605static int backing_file_mismatch(char *file, __u64 size, time_t mtime)
606{
607 unsigned long modtime;
608 unsigned long long actual;
609 int err;
610
611 err = os_file_modtime(file, &modtime);
612 if (err < 0) {
613 printk(KERN_ERR "Failed to get modification time of backing "
614 "file \"%s\", err = %d\n", file, -err);
615 return err;
616 }
617
618 err = os_file_size(file, &actual);
619 if (err < 0) {
620 printk(KERN_ERR "Failed to get size of backing file \"%s\", "
621 "err = %d\n", file, -err);
622 return err;
623 }
624
625 if (actual != size) {
626 /*__u64 can be a long on AMD64 and with %lu GCC complains; so
627 * the typecast.*/
628 printk(KERN_ERR "Size mismatch (%llu vs %llu) of COW header "
629 "vs backing file\n", (unsigned long long) size, actual);
630 return -EINVAL;
631 }
632 if (modtime != mtime) {
633 printk(KERN_ERR "mtime mismatch (%ld vs %ld) of COW header vs "
634 "backing file\n", mtime, modtime);
635 return -EINVAL;
636 }
637 return 0;
638}
639
640static int path_requires_switch(char *from_cmdline, char *from_cow, char *cow)
641{
642 struct uml_stat buf1, buf2;
643 int err;
644
645 if (from_cmdline == NULL)
646 return 0;
647 if (!strcmp(from_cmdline, from_cow))
648 return 0;
649
650 err = os_stat_file(from_cmdline, &buf1);
651 if (err < 0) {
652 printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cmdline,
653 -err);
654 return 0;
655 }
656 err = os_stat_file(from_cow, &buf2);
657 if (err < 0) {
658 printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cow,
659 -err);
660 return 1;
661 }
662 if ((buf1.ust_dev == buf2.ust_dev) && (buf1.ust_ino == buf2.ust_ino))
663 return 0;
664
665 printk(KERN_ERR "Backing file mismatch - \"%s\" requested, "
666 "\"%s\" specified in COW header of \"%s\"\n",
667 from_cmdline, from_cow, cow);
668 return 1;
669}
670
671static int open_ubd_file(char *file, struct openflags *openflags, int shared,
672 char **backing_file_out, int *bitmap_offset_out,
673 unsigned long *bitmap_len_out, int *data_offset_out,
674 int *create_cow_out)
675{
676 time_t mtime;
677 unsigned long long size;
678 __u32 version, align;
679 char *backing_file;
680 int fd, err, sectorsize, asked_switch, mode = 0644;
681
682 fd = os_open_file(file, *openflags, mode);
683 if (fd < 0) {
684 if ((fd == -ENOENT) && (create_cow_out != NULL))
685 *create_cow_out = 1;
686 if (!openflags->w ||
687 ((fd != -EROFS) && (fd != -EACCES)))
688 return fd;
689 openflags->w = 0;
690 fd = os_open_file(file, *openflags, mode);
691 if (fd < 0)
692 return fd;
693 }
694
695 if (shared)
696 printk(KERN_INFO "Not locking \"%s\" on the host\n", file);
697 else {
698 err = os_lock_file(fd, openflags->w);
699 if (err < 0) {
700 printk(KERN_ERR "Failed to lock '%s', err = %d\n",
701 file, -err);
702 goto out_close;
703 }
704 }
705
706 /* Successful return case! */
707 if (backing_file_out == NULL)
708 return fd;
709
710 err = read_cow_header(file_reader, &fd, &version, &backing_file, &mtime,
711 &size, &sectorsize, &align, bitmap_offset_out);
712 if (err && (*backing_file_out != NULL)) {
713 printk(KERN_ERR "Failed to read COW header from COW file "
714 "\"%s\", errno = %d\n", file, -err);
715 goto out_close;
716 }
717 if (err)
718 return fd;
719
720 asked_switch = path_requires_switch(*backing_file_out, backing_file,
721 file);
722
723 /* Allow switching only if no mismatch. */
724 if (asked_switch && !backing_file_mismatch(*backing_file_out, size,
725 mtime)) {
726 printk(KERN_ERR "Switching backing file to '%s'\n",
727 *backing_file_out);
728 err = write_cow_header(file, fd, *backing_file_out,
729 sectorsize, align, &size);
730 if (err) {
731 printk(KERN_ERR "Switch failed, errno = %d\n", -err);
732 goto out_close;
733 }
734 } else {
735 *backing_file_out = backing_file;
736 err = backing_file_mismatch(*backing_file_out, size, mtime);
737 if (err)
738 goto out_close;
739 }
740
741 cow_sizes(version, size, sectorsize, align, *bitmap_offset_out,
742 bitmap_len_out, data_offset_out);
743
744 return fd;
745 out_close:
746 os_close_file(fd);
747 return err;
748}
749
750static int create_cow_file(char *cow_file, char *backing_file,
751 struct openflags flags,
752 int sectorsize, int alignment, int *bitmap_offset_out,
753 unsigned long *bitmap_len_out, int *data_offset_out)
754{
755 int err, fd;
756
757 flags.c = 1;
758 fd = open_ubd_file(cow_file, &flags, 0, NULL, NULL, NULL, NULL, NULL);
759 if (fd < 0) {
760 err = fd;
761 printk(KERN_ERR "Open of COW file '%s' failed, errno = %d\n",
762 cow_file, -err);
763 goto out;
764 }
765
766 err = init_cow_file(fd, cow_file, backing_file, sectorsize, alignment,
767 bitmap_offset_out, bitmap_len_out,
768 data_offset_out);
769 if (!err)
770 return fd;
771 os_close_file(fd);
772 out:
773 return err;
774}
775
776static void ubd_close_dev(struct ubd *ubd_dev)
777{
778 os_close_file(ubd_dev->fd);
779 if(ubd_dev->cow.file == NULL)
780 return;
781
782 os_close_file(ubd_dev->cow.fd);
783 vfree(ubd_dev->cow.bitmap);
784 ubd_dev->cow.bitmap = NULL;
785}
786
787static int ubd_open_dev(struct ubd *ubd_dev)
788{
789 struct openflags flags;
790 char **back_ptr;
791 int err, create_cow, *create_ptr;
792 int fd;
793
794 ubd_dev->openflags = ubd_dev->boot_openflags;
795 create_cow = 0;
796 create_ptr = (ubd_dev->cow.file != NULL) ? &create_cow : NULL;
797 back_ptr = ubd_dev->no_cow ? NULL : &ubd_dev->cow.file;
798
799 fd = open_ubd_file(ubd_dev->file, &ubd_dev->openflags, ubd_dev->shared,
800 back_ptr, &ubd_dev->cow.bitmap_offset,
801 &ubd_dev->cow.bitmap_len, &ubd_dev->cow.data_offset,
802 create_ptr);
803
804 if((fd == -ENOENT) && create_cow){
805 fd = create_cow_file(ubd_dev->file, ubd_dev->cow.file,
David Brazdil0f672f62019-12-10 10:32:29 +0000806 ubd_dev->openflags, SECTOR_SIZE, PAGE_SIZE,
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000807 &ubd_dev->cow.bitmap_offset,
808 &ubd_dev->cow.bitmap_len,
809 &ubd_dev->cow.data_offset);
810 if(fd >= 0){
811 printk(KERN_INFO "Creating \"%s\" as COW file for "
812 "\"%s\"\n", ubd_dev->file, ubd_dev->cow.file);
813 }
814 }
815
816 if(fd < 0){
817 printk("Failed to open '%s', errno = %d\n", ubd_dev->file,
818 -fd);
819 return fd;
820 }
821 ubd_dev->fd = fd;
822
823 if(ubd_dev->cow.file != NULL){
824 blk_queue_max_hw_sectors(ubd_dev->queue, 8 * sizeof(long));
825
826 err = -ENOMEM;
827 ubd_dev->cow.bitmap = vmalloc(ubd_dev->cow.bitmap_len);
828 if(ubd_dev->cow.bitmap == NULL){
829 printk(KERN_ERR "Failed to vmalloc COW bitmap\n");
830 goto error;
831 }
832 flush_tlb_kernel_vm();
833
834 err = read_cow_bitmap(ubd_dev->fd, ubd_dev->cow.bitmap,
835 ubd_dev->cow.bitmap_offset,
836 ubd_dev->cow.bitmap_len);
837 if(err < 0)
838 goto error;
839
840 flags = ubd_dev->openflags;
841 flags.w = 0;
842 err = open_ubd_file(ubd_dev->cow.file, &flags, ubd_dev->shared, NULL,
843 NULL, NULL, NULL, NULL);
844 if(err < 0) goto error;
845 ubd_dev->cow.fd = err;
846 }
David Brazdil0f672f62019-12-10 10:32:29 +0000847 if (ubd_dev->no_trim == 0) {
848 ubd_dev->queue->limits.discard_granularity = SECTOR_SIZE;
849 ubd_dev->queue->limits.discard_alignment = SECTOR_SIZE;
850 blk_queue_max_discard_sectors(ubd_dev->queue, UBD_MAX_REQUEST);
851 blk_queue_max_write_zeroes_sectors(ubd_dev->queue, UBD_MAX_REQUEST);
852 blk_queue_flag_set(QUEUE_FLAG_DISCARD, ubd_dev->queue);
853 }
854 blk_queue_flag_set(QUEUE_FLAG_NONROT, ubd_dev->queue);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000855 return 0;
856 error:
857 os_close_file(ubd_dev->fd);
858 return err;
859}
860
861static void ubd_device_release(struct device *dev)
862{
863 struct ubd *ubd_dev = dev_get_drvdata(dev);
864
865 blk_cleanup_queue(ubd_dev->queue);
David Brazdil0f672f62019-12-10 10:32:29 +0000866 blk_mq_free_tag_set(&ubd_dev->tag_set);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000867 *ubd_dev = ((struct ubd) DEFAULT_UBD);
868}
869
870static int ubd_disk_register(int major, u64 size, int unit,
871 struct gendisk **disk_out)
872{
873 struct device *parent = NULL;
874 struct gendisk *disk;
875
876 disk = alloc_disk(1 << UBD_SHIFT);
877 if(disk == NULL)
878 return -ENOMEM;
879
880 disk->major = major;
881 disk->first_minor = unit << UBD_SHIFT;
882 disk->fops = &ubd_blops;
883 set_capacity(disk, size / 512);
884 if (major == UBD_MAJOR)
885 sprintf(disk->disk_name, "ubd%c", 'a' + unit);
886 else
887 sprintf(disk->disk_name, "ubd_fake%d", unit);
888
889 /* sysfs register (not for ide fake devices) */
890 if (major == UBD_MAJOR) {
891 ubd_devs[unit].pdev.id = unit;
892 ubd_devs[unit].pdev.name = DRIVER_NAME;
893 ubd_devs[unit].pdev.dev.release = ubd_device_release;
894 dev_set_drvdata(&ubd_devs[unit].pdev.dev, &ubd_devs[unit]);
895 platform_device_register(&ubd_devs[unit].pdev);
896 parent = &ubd_devs[unit].pdev.dev;
897 }
898
899 disk->private_data = &ubd_devs[unit];
900 disk->queue = ubd_devs[unit].queue;
David Brazdil0f672f62019-12-10 10:32:29 +0000901 device_add_disk(parent, disk, NULL);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000902
903 *disk_out = disk;
904 return 0;
905}
906
David Brazdil0f672f62019-12-10 10:32:29 +0000907#define ROUND_BLOCK(n) ((n + (SECTOR_SIZE - 1)) & (-SECTOR_SIZE))
908
909static const struct blk_mq_ops ubd_mq_ops = {
910 .queue_rq = ubd_queue_rq,
911};
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000912
913static int ubd_add(int n, char **error_out)
914{
915 struct ubd *ubd_dev = &ubd_devs[n];
916 int err = 0;
917
918 if(ubd_dev->file == NULL)
919 goto out;
920
921 err = ubd_file_size(ubd_dev, &ubd_dev->size);
922 if(err < 0){
923 *error_out = "Couldn't determine size of device's file";
924 goto out;
925 }
926
927 ubd_dev->size = ROUND_BLOCK(ubd_dev->size);
928
David Brazdil0f672f62019-12-10 10:32:29 +0000929 ubd_dev->tag_set.ops = &ubd_mq_ops;
930 ubd_dev->tag_set.queue_depth = 64;
931 ubd_dev->tag_set.numa_node = NUMA_NO_NODE;
932 ubd_dev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
933 ubd_dev->tag_set.driver_data = ubd_dev;
934 ubd_dev->tag_set.nr_hw_queues = 1;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000935
David Brazdil0f672f62019-12-10 10:32:29 +0000936 err = blk_mq_alloc_tag_set(&ubd_dev->tag_set);
937 if (err)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000938 goto out;
David Brazdil0f672f62019-12-10 10:32:29 +0000939
940 ubd_dev->queue = blk_mq_init_queue(&ubd_dev->tag_set);
941 if (IS_ERR(ubd_dev->queue)) {
942 err = PTR_ERR(ubd_dev->queue);
943 goto out_cleanup_tags;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000944 }
David Brazdil0f672f62019-12-10 10:32:29 +0000945
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000946 ubd_dev->queue->queuedata = ubd_dev;
947 blk_queue_write_cache(ubd_dev->queue, true, false);
948
949 blk_queue_max_segments(ubd_dev->queue, MAX_SG);
Olivier Deprez0e641232021-09-23 10:07:05 +0200950 blk_queue_segment_boundary(ubd_dev->queue, PAGE_SIZE - 1);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000951 err = ubd_disk_register(UBD_MAJOR, ubd_dev->size, n, &ubd_gendisk[n]);
952 if(err){
953 *error_out = "Failed to register device";
David Brazdil0f672f62019-12-10 10:32:29 +0000954 goto out_cleanup_tags;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000955 }
956
957 if (fake_major != UBD_MAJOR)
958 ubd_disk_register(fake_major, ubd_dev->size, n,
959 &fake_gendisk[n]);
960
961 /*
962 * Perhaps this should also be under the "if (fake_major)" above
963 * using the fake_disk->disk_name
964 */
965 if (fake_ide)
966 make_ide_entries(ubd_gendisk[n]->disk_name);
967
968 err = 0;
969out:
970 return err;
971
David Brazdil0f672f62019-12-10 10:32:29 +0000972out_cleanup_tags:
973 blk_mq_free_tag_set(&ubd_dev->tag_set);
974 if (!(IS_ERR(ubd_dev->queue)))
975 blk_cleanup_queue(ubd_dev->queue);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000976 goto out;
977}
978
979static int ubd_config(char *str, char **error_out)
980{
981 int n, ret;
982
983 /* This string is possibly broken up and stored, so it's only
984 * freed if ubd_setup_common fails, or if only general options
985 * were set.
986 */
987 str = kstrdup(str, GFP_KERNEL);
988 if (str == NULL) {
989 *error_out = "Failed to allocate memory";
990 return -ENOMEM;
991 }
992
993 ret = ubd_setup_common(str, &n, error_out);
994 if (ret)
995 goto err_free;
996
997 if (n == -1) {
998 ret = 0;
999 goto err_free;
1000 }
1001
1002 mutex_lock(&ubd_lock);
1003 ret = ubd_add(n, error_out);
1004 if (ret)
1005 ubd_devs[n].file = NULL;
1006 mutex_unlock(&ubd_lock);
1007
1008out:
1009 return ret;
1010
1011err_free:
1012 kfree(str);
1013 goto out;
1014}
1015
1016static int ubd_get_config(char *name, char *str, int size, char **error_out)
1017{
1018 struct ubd *ubd_dev;
1019 int n, len = 0;
1020
1021 n = parse_unit(&name);
1022 if((n >= MAX_DEV) || (n < 0)){
1023 *error_out = "ubd_get_config : device number out of range";
1024 return -1;
1025 }
1026
1027 ubd_dev = &ubd_devs[n];
1028 mutex_lock(&ubd_lock);
1029
1030 if(ubd_dev->file == NULL){
1031 CONFIG_CHUNK(str, size, len, "", 1);
1032 goto out;
1033 }
1034
1035 CONFIG_CHUNK(str, size, len, ubd_dev->file, 0);
1036
1037 if(ubd_dev->cow.file != NULL){
1038 CONFIG_CHUNK(str, size, len, ",", 0);
1039 CONFIG_CHUNK(str, size, len, ubd_dev->cow.file, 1);
1040 }
1041 else CONFIG_CHUNK(str, size, len, "", 1);
1042
1043 out:
1044 mutex_unlock(&ubd_lock);
1045 return len;
1046}
1047
1048static int ubd_id(char **str, int *start_out, int *end_out)
1049{
1050 int n;
1051
1052 n = parse_unit(str);
1053 *start_out = 0;
1054 *end_out = MAX_DEV - 1;
1055 return n;
1056}
1057
1058static int ubd_remove(int n, char **error_out)
1059{
1060 struct gendisk *disk = ubd_gendisk[n];
1061 struct ubd *ubd_dev;
1062 int err = -ENODEV;
1063
1064 mutex_lock(&ubd_lock);
1065
1066 ubd_dev = &ubd_devs[n];
1067
1068 if(ubd_dev->file == NULL)
1069 goto out;
1070
1071 /* you cannot remove a open disk */
1072 err = -EBUSY;
1073 if(ubd_dev->count > 0)
1074 goto out;
1075
1076 ubd_gendisk[n] = NULL;
1077 if(disk != NULL){
1078 del_gendisk(disk);
1079 put_disk(disk);
1080 }
1081
1082 if(fake_gendisk[n] != NULL){
1083 del_gendisk(fake_gendisk[n]);
1084 put_disk(fake_gendisk[n]);
1085 fake_gendisk[n] = NULL;
1086 }
1087
1088 err = 0;
1089 platform_device_unregister(&ubd_dev->pdev);
1090out:
1091 mutex_unlock(&ubd_lock);
1092 return err;
1093}
1094
1095/* All these are called by mconsole in process context and without
1096 * ubd-specific locks. The structure itself is const except for .list.
1097 */
1098static struct mc_device ubd_mc = {
1099 .list = LIST_HEAD_INIT(ubd_mc.list),
1100 .name = "ubd",
1101 .config = ubd_config,
1102 .get_config = ubd_get_config,
1103 .id = ubd_id,
1104 .remove = ubd_remove,
1105};
1106
1107static int __init ubd_mc_init(void)
1108{
1109 mconsole_register_dev(&ubd_mc);
1110 return 0;
1111}
1112
1113__initcall(ubd_mc_init);
1114
1115static int __init ubd0_init(void)
1116{
1117 struct ubd *ubd_dev = &ubd_devs[0];
1118
1119 mutex_lock(&ubd_lock);
1120 if(ubd_dev->file == NULL)
1121 ubd_dev->file = "root_fs";
1122 mutex_unlock(&ubd_lock);
1123
1124 return 0;
1125}
1126
1127__initcall(ubd0_init);
1128
1129/* Used in ubd_init, which is an initcall */
1130static struct platform_driver ubd_driver = {
1131 .driver = {
1132 .name = DRIVER_NAME,
1133 },
1134};
1135
1136static int __init ubd_init(void)
1137{
1138 char *error;
1139 int i, err;
1140
1141 if (register_blkdev(UBD_MAJOR, "ubd"))
1142 return -1;
1143
1144 if (fake_major != UBD_MAJOR) {
1145 char name[sizeof("ubd_nnn\0")];
1146
1147 snprintf(name, sizeof(name), "ubd_%d", fake_major);
1148 if (register_blkdev(fake_major, "ubd"))
1149 return -1;
1150 }
1151
1152 irq_req_buffer = kmalloc_array(UBD_REQ_BUFFER_SIZE,
1153 sizeof(struct io_thread_req *),
1154 GFP_KERNEL
1155 );
1156 irq_remainder = 0;
1157
1158 if (irq_req_buffer == NULL) {
1159 printk(KERN_ERR "Failed to initialize ubd buffering\n");
1160 return -1;
1161 }
1162 io_req_buffer = kmalloc_array(UBD_REQ_BUFFER_SIZE,
1163 sizeof(struct io_thread_req *),
1164 GFP_KERNEL
1165 );
1166
1167 io_remainder = 0;
1168
1169 if (io_req_buffer == NULL) {
1170 printk(KERN_ERR "Failed to initialize ubd buffering\n");
1171 return -1;
1172 }
1173 platform_driver_register(&ubd_driver);
1174 mutex_lock(&ubd_lock);
1175 for (i = 0; i < MAX_DEV; i++){
1176 err = ubd_add(i, &error);
1177 if(err)
1178 printk(KERN_ERR "Failed to initialize ubd device %d :"
1179 "%s\n", i, error);
1180 }
1181 mutex_unlock(&ubd_lock);
1182 return 0;
1183}
1184
1185late_initcall(ubd_init);
1186
1187static int __init ubd_driver_init(void){
1188 unsigned long stack;
1189 int err;
1190
1191 /* Set by CONFIG_BLK_DEV_UBD_SYNC or ubd=sync.*/
1192 if(global_openflags.s){
1193 printk(KERN_INFO "ubd: Synchronous mode\n");
1194 /* Letting ubd=sync be like using ubd#s= instead of ubd#= is
1195 * enough. So use anyway the io thread. */
1196 }
1197 stack = alloc_stack(0, 0);
1198 io_pid = start_io_thread(stack + PAGE_SIZE - sizeof(void *),
1199 &thread_fd);
1200 if(io_pid < 0){
1201 printk(KERN_ERR
1202 "ubd : Failed to start I/O thread (errno = %d) - "
1203 "falling back to synchronous I/O\n", -io_pid);
1204 io_pid = -1;
1205 return 0;
1206 }
1207 err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr,
1208 0, "ubd", ubd_devs);
1209 if(err != 0)
1210 printk(KERN_ERR "um_request_irq failed - errno = %d\n", -err);
1211 return 0;
1212}
1213
1214device_initcall(ubd_driver_init);
1215
1216static int ubd_open(struct block_device *bdev, fmode_t mode)
1217{
1218 struct gendisk *disk = bdev->bd_disk;
1219 struct ubd *ubd_dev = disk->private_data;
1220 int err = 0;
1221
1222 mutex_lock(&ubd_mutex);
1223 if(ubd_dev->count == 0){
1224 err = ubd_open_dev(ubd_dev);
1225 if(err){
1226 printk(KERN_ERR "%s: Can't open \"%s\": errno = %d\n",
1227 disk->disk_name, ubd_dev->file, -err);
1228 goto out;
1229 }
1230 }
1231 ubd_dev->count++;
1232 set_disk_ro(disk, !ubd_dev->openflags.w);
1233
1234 /* This should no more be needed. And it didn't work anyway to exclude
1235 * read-write remounting of filesystems.*/
1236 /*if((mode & FMODE_WRITE) && !ubd_dev->openflags.w){
1237 if(--ubd_dev->count == 0) ubd_close_dev(ubd_dev);
1238 err = -EROFS;
1239 }*/
1240out:
1241 mutex_unlock(&ubd_mutex);
1242 return err;
1243}
1244
1245static void ubd_release(struct gendisk *disk, fmode_t mode)
1246{
1247 struct ubd *ubd_dev = disk->private_data;
1248
1249 mutex_lock(&ubd_mutex);
1250 if(--ubd_dev->count == 0)
1251 ubd_close_dev(ubd_dev);
1252 mutex_unlock(&ubd_mutex);
1253}
1254
1255static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask,
1256 __u64 *cow_offset, unsigned long *bitmap,
1257 __u64 bitmap_offset, unsigned long *bitmap_words,
1258 __u64 bitmap_len)
1259{
David Brazdil0f672f62019-12-10 10:32:29 +00001260 __u64 sector = io_offset >> SECTOR_SHIFT;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001261 int i, update_bitmap = 0;
1262
David Brazdil0f672f62019-12-10 10:32:29 +00001263 for (i = 0; i < length >> SECTOR_SHIFT; i++) {
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001264 if(cow_mask != NULL)
1265 ubd_set_bit(i, (unsigned char *) cow_mask);
1266 if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
1267 continue;
1268
1269 update_bitmap = 1;
1270 ubd_set_bit(sector + i, (unsigned char *) bitmap);
1271 }
1272
1273 if(!update_bitmap)
1274 return;
1275
1276 *cow_offset = sector / (sizeof(unsigned long) * 8);
1277
1278 /* This takes care of the case where we're exactly at the end of the
1279 * device, and *cow_offset + 1 is off the end. So, just back it up
1280 * by one word. Thanks to Lynn Kerby for the fix and James McMechan
1281 * for the original diagnosis.
1282 */
1283 if (*cow_offset == (DIV_ROUND_UP(bitmap_len,
1284 sizeof(unsigned long)) - 1))
1285 (*cow_offset)--;
1286
1287 bitmap_words[0] = bitmap[*cow_offset];
1288 bitmap_words[1] = bitmap[*cow_offset + 1];
1289
1290 *cow_offset *= sizeof(unsigned long);
1291 *cow_offset += bitmap_offset;
1292}
1293
Olivier Deprez0e641232021-09-23 10:07:05 +02001294static void cowify_req(struct io_thread_req *req, struct io_desc *segment,
1295 unsigned long offset, unsigned long *bitmap,
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001296 __u64 bitmap_offset, __u64 bitmap_len)
1297{
Olivier Deprez0e641232021-09-23 10:07:05 +02001298 __u64 sector = offset >> SECTOR_SHIFT;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001299 int i;
1300
Olivier Deprez0e641232021-09-23 10:07:05 +02001301 if (segment->length > (sizeof(segment->sector_mask) * 8) << SECTOR_SHIFT)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001302 panic("Operation too long");
1303
David Brazdil0f672f62019-12-10 10:32:29 +00001304 if (req_op(req->req) == REQ_OP_READ) {
Olivier Deprez0e641232021-09-23 10:07:05 +02001305 for (i = 0; i < segment->length >> SECTOR_SHIFT; i++) {
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001306 if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
1307 ubd_set_bit(i, (unsigned char *)
Olivier Deprez0e641232021-09-23 10:07:05 +02001308 &segment->sector_mask);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001309 }
Olivier Deprez0e641232021-09-23 10:07:05 +02001310 } else {
1311 cowify_bitmap(offset, segment->length, &segment->sector_mask,
1312 &segment->cow_offset, bitmap, bitmap_offset,
1313 segment->bitmap_words, bitmap_len);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001314 }
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001315}
1316
Olivier Deprez0e641232021-09-23 10:07:05 +02001317static void ubd_map_req(struct ubd *dev, struct io_thread_req *io_req,
1318 struct request *req)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001319{
Olivier Deprez0e641232021-09-23 10:07:05 +02001320 struct bio_vec bvec;
1321 struct req_iterator iter;
1322 int i = 0;
1323 unsigned long byte_offset = io_req->offset;
1324 int op = req_op(req);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001325
Olivier Deprez0e641232021-09-23 10:07:05 +02001326 if (op == REQ_OP_WRITE_ZEROES || op == REQ_OP_DISCARD) {
1327 io_req->io_desc[0].buffer = NULL;
1328 io_req->io_desc[0].length = blk_rq_bytes(req);
1329 } else {
1330 rq_for_each_segment(bvec, req, iter) {
1331 BUG_ON(i >= io_req->desc_cnt);
1332
1333 io_req->io_desc[i].buffer =
1334 page_address(bvec.bv_page) + bvec.bv_offset;
1335 io_req->io_desc[i].length = bvec.bv_len;
1336 i++;
1337 }
1338 }
1339
1340 if (dev->cow.file) {
1341 for (i = 0; i < io_req->desc_cnt; i++) {
1342 cowify_req(io_req, &io_req->io_desc[i], byte_offset,
1343 dev->cow.bitmap, dev->cow.bitmap_offset,
1344 dev->cow.bitmap_len);
1345 byte_offset += io_req->io_desc[i].length;
1346 }
1347
1348 }
1349}
1350
1351static struct io_thread_req *ubd_alloc_req(struct ubd *dev, struct request *req,
1352 int desc_cnt)
1353{
1354 struct io_thread_req *io_req;
1355 int i;
1356
1357 io_req = kmalloc(sizeof(*io_req) +
1358 (desc_cnt * sizeof(struct io_desc)),
1359 GFP_ATOMIC);
David Brazdil0f672f62019-12-10 10:32:29 +00001360 if (!io_req)
Olivier Deprez0e641232021-09-23 10:07:05 +02001361 return NULL;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001362
David Brazdil0f672f62019-12-10 10:32:29 +00001363 io_req->req = req;
1364 if (dev->cow.file)
1365 io_req->fds[0] = dev->cow.fd;
1366 else
1367 io_req->fds[0] = dev->fd;
1368 io_req->error = 0;
David Brazdil0f672f62019-12-10 10:32:29 +00001369 io_req->sectorsize = SECTOR_SIZE;
1370 io_req->fds[1] = dev->fd;
Olivier Deprez0e641232021-09-23 10:07:05 +02001371 io_req->offset = (u64) blk_rq_pos(req) << SECTOR_SHIFT;
David Brazdil0f672f62019-12-10 10:32:29 +00001372 io_req->offsets[0] = 0;
1373 io_req->offsets[1] = dev->cow.data_offset;
1374
Olivier Deprez0e641232021-09-23 10:07:05 +02001375 for (i = 0 ; i < desc_cnt; i++) {
1376 io_req->io_desc[i].sector_mask = 0;
1377 io_req->io_desc[i].cow_offset = -1;
1378 }
1379
1380 return io_req;
1381}
1382
1383static int ubd_submit_request(struct ubd *dev, struct request *req)
1384{
1385 int segs = 0;
1386 struct io_thread_req *io_req;
1387 int ret;
1388 int op = req_op(req);
1389
1390 if (op == REQ_OP_FLUSH)
1391 segs = 0;
1392 else if (op == REQ_OP_WRITE_ZEROES || op == REQ_OP_DISCARD)
1393 segs = 1;
1394 else
1395 segs = blk_rq_nr_phys_segments(req);
1396
1397 io_req = ubd_alloc_req(dev, req, segs);
1398 if (!io_req)
1399 return -ENOMEM;
1400
1401 io_req->desc_cnt = segs;
1402 if (segs)
1403 ubd_map_req(dev, io_req, req);
David Brazdil0f672f62019-12-10 10:32:29 +00001404
1405 ret = os_write_file(thread_fd, &io_req, sizeof(io_req));
1406 if (ret != sizeof(io_req)) {
1407 if (ret != -EAGAIN)
1408 pr_err("write to io thread failed: %d\n", -ret);
1409 kfree(io_req);
1410 }
1411 return ret;
1412}
1413
David Brazdil0f672f62019-12-10 10:32:29 +00001414static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx,
1415 const struct blk_mq_queue_data *bd)
1416{
1417 struct ubd *ubd_dev = hctx->queue->queuedata;
1418 struct request *req = bd->rq;
1419 int ret = 0, res = BLK_STS_OK;
1420
1421 blk_mq_start_request(req);
1422
1423 spin_lock_irq(&ubd_dev->lock);
1424
1425 switch (req_op(req)) {
David Brazdil0f672f62019-12-10 10:32:29 +00001426 case REQ_OP_FLUSH:
David Brazdil0f672f62019-12-10 10:32:29 +00001427 case REQ_OP_READ:
1428 case REQ_OP_WRITE:
David Brazdil0f672f62019-12-10 10:32:29 +00001429 case REQ_OP_DISCARD:
1430 case REQ_OP_WRITE_ZEROES:
Olivier Deprez0e641232021-09-23 10:07:05 +02001431 ret = ubd_submit_request(ubd_dev, req);
David Brazdil0f672f62019-12-10 10:32:29 +00001432 break;
1433 default:
1434 WARN_ON_ONCE(1);
1435 res = BLK_STS_NOTSUPP;
1436 }
1437
1438 spin_unlock_irq(&ubd_dev->lock);
1439
1440 if (ret < 0) {
1441 if (ret == -ENOMEM)
1442 res = BLK_STS_RESOURCE;
1443 else
1444 res = BLK_STS_DEV_RESOURCE;
1445 }
1446
1447 return res;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001448}
1449
1450static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
1451{
1452 struct ubd *ubd_dev = bdev->bd_disk->private_data;
1453
1454 geo->heads = 128;
1455 geo->sectors = 32;
1456 geo->cylinders = ubd_dev->size / (128 * 32 * 512);
1457 return 0;
1458}
1459
1460static int ubd_ioctl(struct block_device *bdev, fmode_t mode,
1461 unsigned int cmd, unsigned long arg)
1462{
1463 struct ubd *ubd_dev = bdev->bd_disk->private_data;
1464 u16 ubd_id[ATA_ID_WORDS];
1465
1466 switch (cmd) {
1467 struct cdrom_volctrl volume;
1468 case HDIO_GET_IDENTITY:
1469 memset(&ubd_id, 0, ATA_ID_WORDS * 2);
1470 ubd_id[ATA_ID_CYLS] = ubd_dev->size / (128 * 32 * 512);
1471 ubd_id[ATA_ID_HEADS] = 128;
1472 ubd_id[ATA_ID_SECTORS] = 32;
1473 if(copy_to_user((char __user *) arg, (char *) &ubd_id,
1474 sizeof(ubd_id)))
1475 return -EFAULT;
1476 return 0;
1477
1478 case CDROMVOLREAD:
1479 if(copy_from_user(&volume, (char __user *) arg, sizeof(volume)))
1480 return -EFAULT;
1481 volume.channel0 = 255;
1482 volume.channel1 = 255;
1483 volume.channel2 = 255;
1484 volume.channel3 = 255;
1485 if(copy_to_user((char __user *) arg, &volume, sizeof(volume)))
1486 return -EFAULT;
1487 return 0;
1488 }
1489 return -EINVAL;
1490}
1491
David Brazdil0f672f62019-12-10 10:32:29 +00001492static int map_error(int error_code)
1493{
1494 switch (error_code) {
1495 case 0:
1496 return BLK_STS_OK;
1497 case ENOSYS:
1498 case EOPNOTSUPP:
1499 return BLK_STS_NOTSUPP;
1500 case ENOSPC:
1501 return BLK_STS_NOSPC;
1502 }
1503 return BLK_STS_IOERR;
1504}
1505
1506/*
1507 * Everything from here onwards *IS NOT PART OF THE KERNEL*
1508 *
1509 * The following functions are part of UML hypervisor code.
1510 * All functions from here onwards are executed as a helper
1511 * thread and are not allowed to execute any kernel functions.
1512 *
1513 * Any communication must occur strictly via shared memory and IPC.
1514 *
1515 * Do not add printks, locks, kernel memory operations, etc - it
1516 * will result in unpredictable behaviour and/or crashes.
1517 */
1518
Olivier Deprez0e641232021-09-23 10:07:05 +02001519static int update_bitmap(struct io_thread_req *req, struct io_desc *segment)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001520{
1521 int n;
1522
Olivier Deprez0e641232021-09-23 10:07:05 +02001523 if (segment->cow_offset == -1)
David Brazdil0f672f62019-12-10 10:32:29 +00001524 return map_error(0);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001525
Olivier Deprez0e641232021-09-23 10:07:05 +02001526 n = os_pwrite_file(req->fds[1], &segment->bitmap_words,
1527 sizeof(segment->bitmap_words), segment->cow_offset);
1528 if (n != sizeof(segment->bitmap_words))
David Brazdil0f672f62019-12-10 10:32:29 +00001529 return map_error(-n);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001530
David Brazdil0f672f62019-12-10 10:32:29 +00001531 return map_error(0);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001532}
1533
Olivier Deprez0e641232021-09-23 10:07:05 +02001534static void do_io(struct io_thread_req *req, struct io_desc *desc)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001535{
David Brazdil0f672f62019-12-10 10:32:29 +00001536 char *buf = NULL;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001537 unsigned long len;
1538 int n, nsectors, start, end, bit;
1539 __u64 off;
1540
David Brazdil0f672f62019-12-10 10:32:29 +00001541 /* FLUSH is really a special case, we cannot "case" it with others */
1542
1543 if (req_op(req->req) == REQ_OP_FLUSH) {
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001544 /* fds[0] is always either the rw image or our cow file */
David Brazdil0f672f62019-12-10 10:32:29 +00001545 req->error = map_error(-os_sync_file(req->fds[0]));
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001546 return;
1547 }
1548
Olivier Deprez0e641232021-09-23 10:07:05 +02001549 nsectors = desc->length / req->sectorsize;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001550 start = 0;
1551 do {
Olivier Deprez0e641232021-09-23 10:07:05 +02001552 bit = ubd_test_bit(start, (unsigned char *) &desc->sector_mask);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001553 end = start;
1554 while((end < nsectors) &&
Olivier Deprez0e641232021-09-23 10:07:05 +02001555 (ubd_test_bit(end, (unsigned char *) &desc->sector_mask) == bit))
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001556 end++;
1557
1558 off = req->offset + req->offsets[bit] +
1559 start * req->sectorsize;
1560 len = (end - start) * req->sectorsize;
Olivier Deprez0e641232021-09-23 10:07:05 +02001561 if (desc->buffer != NULL)
1562 buf = &desc->buffer[start * req->sectorsize];
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001563
David Brazdil0f672f62019-12-10 10:32:29 +00001564 switch (req_op(req->req)) {
1565 case REQ_OP_READ:
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001566 n = 0;
1567 do {
1568 buf = &buf[n];
1569 len -= n;
1570 n = os_pread_file(req->fds[bit], buf, len, off);
1571 if (n < 0) {
David Brazdil0f672f62019-12-10 10:32:29 +00001572 req->error = map_error(-n);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001573 return;
1574 }
1575 } while((n < len) && (n != 0));
1576 if (n < len) memset(&buf[n], 0, len - n);
David Brazdil0f672f62019-12-10 10:32:29 +00001577 break;
1578 case REQ_OP_WRITE:
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001579 n = os_pwrite_file(req->fds[bit], buf, len, off);
1580 if(n != len){
David Brazdil0f672f62019-12-10 10:32:29 +00001581 req->error = map_error(-n);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001582 return;
1583 }
David Brazdil0f672f62019-12-10 10:32:29 +00001584 break;
1585 case REQ_OP_DISCARD:
1586 case REQ_OP_WRITE_ZEROES:
1587 n = os_falloc_punch(req->fds[bit], off, len);
1588 if (n) {
1589 req->error = map_error(-n);
1590 return;
1591 }
1592 break;
1593 default:
1594 WARN_ON_ONCE(1);
1595 req->error = BLK_STS_NOTSUPP;
1596 return;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001597 }
1598
1599 start = end;
1600 } while(start < nsectors);
1601
Olivier Deprez0e641232021-09-23 10:07:05 +02001602 req->offset += len;
1603 req->error = update_bitmap(req, desc);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001604}
1605
1606/* Changed in start_io_thread, which is serialized by being called only
1607 * from ubd_init, which is an initcall.
1608 */
1609int kernel_fd = -1;
1610
1611/* Only changed by the io thread. XXX: currently unused. */
1612static int io_count = 0;
1613
1614int io_thread(void *arg)
1615{
1616 int n, count, written, res;
1617
1618 os_fix_helper_signals();
1619
1620 while(1){
1621 n = bulk_req_safe_read(
1622 kernel_fd,
1623 io_req_buffer,
1624 &io_remainder,
1625 &io_remainder_size,
1626 UBD_REQ_BUFFER_SIZE
1627 );
1628 if (n < 0) {
1629 if (n == -EAGAIN) {
1630 ubd_read_poll(-1);
1631 continue;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001632 }
1633 }
1634
1635 for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
Olivier Deprez0e641232021-09-23 10:07:05 +02001636 struct io_thread_req *req = (*io_req_buffer)[count];
1637 int i;
1638
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001639 io_count++;
Olivier Deprez0e641232021-09-23 10:07:05 +02001640 for (i = 0; !req->error && i < req->desc_cnt; i++)
1641 do_io(req, &(req->io_desc[i]));
1642
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001643 }
1644
1645 written = 0;
1646
1647 do {
Olivier Deprez0e641232021-09-23 10:07:05 +02001648 res = os_write_file(kernel_fd,
1649 ((char *) io_req_buffer) + written,
1650 n - written);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001651 if (res >= 0) {
1652 written += res;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001653 }
1654 if (written < n) {
1655 ubd_write_poll(-1);
1656 }
1657 } while (written < n);
1658 }
1659
1660 return 0;
1661}