blob: 83c470364dfb34dce51320322e9b119c10157f3e [file] [log] [blame]
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001/*
2 * Copyright (C) 2015-2016 Anton Ivanov (aivanov@brocade.com)
3 * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
4 * Licensed under the GPL
5 */
6
7/* 2001-09-28...2002-04-17
8 * Partition stuff by James_McMechan@hotmail.com
9 * old style ubd by setting UBD_SHIFT to 0
10 * 2002-09-27...2002-10-18 massive tinkering for 2.5
11 * partitions have changed in 2.5
12 * 2003-01-29 more tinkering for 2.5.59-1
13 * This should now address the sysfs problems and has
14 * the symlink for devfs to allow for booting with
15 * the common /dev/ubd/discX/... names rather than
16 * only /dev/ubdN/discN this version also has lots of
17 * clean ups preparing for ubd-many.
18 * James McMechan
19 */
20
21#define UBD_SHIFT 4
22
23#include <linux/module.h>
24#include <linux/init.h>
25#include <linux/blkdev.h>
26#include <linux/ata.h>
27#include <linux/hdreg.h>
28#include <linux/cdrom.h>
29#include <linux/proc_fs.h>
30#include <linux/seq_file.h>
31#include <linux/ctype.h>
32#include <linux/slab.h>
33#include <linux/vmalloc.h>
34#include <linux/platform_device.h>
35#include <linux/scatterlist.h>
36#include <asm/tlbflush.h>
37#include <kern_util.h>
38#include "mconsole_kern.h"
39#include <init.h>
40#include <irq_kern.h>
41#include "ubd.h"
42#include <os.h>
43#include "cow.h"
44
45enum ubd_req { UBD_READ, UBD_WRITE, UBD_FLUSH };
46
47struct io_thread_req {
48 struct request *req;
49 enum ubd_req op;
50 int fds[2];
51 unsigned long offsets[2];
52 unsigned long long offset;
53 unsigned long length;
54 char *buffer;
55 int sectorsize;
56 unsigned long sector_mask;
57 unsigned long long cow_offset;
58 unsigned long bitmap_words[2];
59 int error;
60};
61
62
63static struct io_thread_req * (*irq_req_buffer)[];
64static struct io_thread_req *irq_remainder;
65static int irq_remainder_size;
66
67static struct io_thread_req * (*io_req_buffer)[];
68static struct io_thread_req *io_remainder;
69static int io_remainder_size;
70
71
72
73static inline int ubd_test_bit(__u64 bit, unsigned char *data)
74{
75 __u64 n;
76 int bits, off;
77
78 bits = sizeof(data[0]) * 8;
79 n = bit / bits;
80 off = bit % bits;
81 return (data[n] & (1 << off)) != 0;
82}
83
84static inline void ubd_set_bit(__u64 bit, unsigned char *data)
85{
86 __u64 n;
87 int bits, off;
88
89 bits = sizeof(data[0]) * 8;
90 n = bit / bits;
91 off = bit % bits;
92 data[n] |= (1 << off);
93}
94/*End stuff from ubd_user.h*/
95
96#define DRIVER_NAME "uml-blkdev"
97
98static DEFINE_MUTEX(ubd_lock);
99static DEFINE_MUTEX(ubd_mutex); /* replaces BKL, might not be needed */
100
101static int ubd_open(struct block_device *bdev, fmode_t mode);
102static void ubd_release(struct gendisk *disk, fmode_t mode);
103static int ubd_ioctl(struct block_device *bdev, fmode_t mode,
104 unsigned int cmd, unsigned long arg);
105static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo);
106
107#define MAX_DEV (16)
108
109static const struct block_device_operations ubd_blops = {
110 .owner = THIS_MODULE,
111 .open = ubd_open,
112 .release = ubd_release,
113 .ioctl = ubd_ioctl,
114 .getgeo = ubd_getgeo,
115};
116
117/* Protected by ubd_lock */
118static int fake_major = UBD_MAJOR;
119static struct gendisk *ubd_gendisk[MAX_DEV];
120static struct gendisk *fake_gendisk[MAX_DEV];
121
122#ifdef CONFIG_BLK_DEV_UBD_SYNC
123#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 1, .c = 0, \
124 .cl = 1 })
125#else
126#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 0, .c = 0, \
127 .cl = 1 })
128#endif
129static struct openflags global_openflags = OPEN_FLAGS;
130
131struct cow {
132 /* backing file name */
133 char *file;
134 /* backing file fd */
135 int fd;
136 unsigned long *bitmap;
137 unsigned long bitmap_len;
138 int bitmap_offset;
139 int data_offset;
140};
141
142#define MAX_SG 64
143
144struct ubd {
145 struct list_head restart;
146 /* name (and fd, below) of the file opened for writing, either the
147 * backing or the cow file. */
148 char *file;
149 int count;
150 int fd;
151 __u64 size;
152 struct openflags boot_openflags;
153 struct openflags openflags;
154 unsigned shared:1;
155 unsigned no_cow:1;
156 struct cow cow;
157 struct platform_device pdev;
158 struct request_queue *queue;
159 spinlock_t lock;
160 struct scatterlist sg[MAX_SG];
161 struct request *request;
162 int start_sg, end_sg;
163 sector_t rq_pos;
164};
165
166#define DEFAULT_COW { \
167 .file = NULL, \
168 .fd = -1, \
169 .bitmap = NULL, \
170 .bitmap_offset = 0, \
171 .data_offset = 0, \
172}
173
174#define DEFAULT_UBD { \
175 .file = NULL, \
176 .count = 0, \
177 .fd = -1, \
178 .size = -1, \
179 .boot_openflags = OPEN_FLAGS, \
180 .openflags = OPEN_FLAGS, \
181 .no_cow = 0, \
182 .shared = 0, \
183 .cow = DEFAULT_COW, \
184 .lock = __SPIN_LOCK_UNLOCKED(ubd_devs.lock), \
185 .request = NULL, \
186 .start_sg = 0, \
187 .end_sg = 0, \
188 .rq_pos = 0, \
189}
190
191/* Protected by ubd_lock */
192static struct ubd ubd_devs[MAX_DEV] = { [0 ... MAX_DEV - 1] = DEFAULT_UBD };
193
194/* Only changed by fake_ide_setup which is a setup */
195static int fake_ide = 0;
196static struct proc_dir_entry *proc_ide_root = NULL;
197static struct proc_dir_entry *proc_ide = NULL;
198
199static void make_proc_ide(void)
200{
201 proc_ide_root = proc_mkdir("ide", NULL);
202 proc_ide = proc_mkdir("ide0", proc_ide_root);
203}
204
205static int fake_ide_media_proc_show(struct seq_file *m, void *v)
206{
207 seq_puts(m, "disk\n");
208 return 0;
209}
210
211static void make_ide_entries(const char *dev_name)
212{
213 struct proc_dir_entry *dir, *ent;
214 char name[64];
215
216 if(proc_ide_root == NULL) make_proc_ide();
217
218 dir = proc_mkdir(dev_name, proc_ide);
219 if(!dir) return;
220
221 ent = proc_create_single("media", S_IRUGO, dir,
222 fake_ide_media_proc_show);
223 if(!ent) return;
224 snprintf(name, sizeof(name), "ide0/%s", dev_name);
225 proc_symlink(dev_name, proc_ide_root, name);
226}
227
228static int fake_ide_setup(char *str)
229{
230 fake_ide = 1;
231 return 1;
232}
233
234__setup("fake_ide", fake_ide_setup);
235
236__uml_help(fake_ide_setup,
237"fake_ide\n"
238" Create ide0 entries that map onto ubd devices.\n\n"
239);
240
241static int parse_unit(char **ptr)
242{
243 char *str = *ptr, *end;
244 int n = -1;
245
246 if(isdigit(*str)) {
247 n = simple_strtoul(str, &end, 0);
248 if(end == str)
249 return -1;
250 *ptr = end;
251 }
252 else if (('a' <= *str) && (*str <= 'z')) {
253 n = *str - 'a';
254 str++;
255 *ptr = str;
256 }
257 return n;
258}
259
260/* If *index_out == -1 at exit, the passed option was a general one;
261 * otherwise, the str pointer is used (and owned) inside ubd_devs array, so it
262 * should not be freed on exit.
263 */
264static int ubd_setup_common(char *str, int *index_out, char **error_out)
265{
266 struct ubd *ubd_dev;
267 struct openflags flags = global_openflags;
268 char *backing_file;
269 int n, err = 0, i;
270
271 if(index_out) *index_out = -1;
272 n = *str;
273 if(n == '='){
274 char *end;
275 int major;
276
277 str++;
278 if(!strcmp(str, "sync")){
279 global_openflags = of_sync(global_openflags);
280 goto out1;
281 }
282
283 err = -EINVAL;
284 major = simple_strtoul(str, &end, 0);
285 if((*end != '\0') || (end == str)){
286 *error_out = "Didn't parse major number";
287 goto out1;
288 }
289
290 mutex_lock(&ubd_lock);
291 if (fake_major != UBD_MAJOR) {
292 *error_out = "Can't assign a fake major twice";
293 goto out1;
294 }
295
296 fake_major = major;
297
298 printk(KERN_INFO "Setting extra ubd major number to %d\n",
299 major);
300 err = 0;
301 out1:
302 mutex_unlock(&ubd_lock);
303 return err;
304 }
305
306 n = parse_unit(&str);
307 if(n < 0){
308 *error_out = "Couldn't parse device number";
309 return -EINVAL;
310 }
311 if(n >= MAX_DEV){
312 *error_out = "Device number out of range";
313 return 1;
314 }
315
316 err = -EBUSY;
317 mutex_lock(&ubd_lock);
318
319 ubd_dev = &ubd_devs[n];
320 if(ubd_dev->file != NULL){
321 *error_out = "Device is already configured";
322 goto out;
323 }
324
325 if (index_out)
326 *index_out = n;
327
328 err = -EINVAL;
329 for (i = 0; i < sizeof("rscd="); i++) {
330 switch (*str) {
331 case 'r':
332 flags.w = 0;
333 break;
334 case 's':
335 flags.s = 1;
336 break;
337 case 'd':
338 ubd_dev->no_cow = 1;
339 break;
340 case 'c':
341 ubd_dev->shared = 1;
342 break;
343 case '=':
344 str++;
345 goto break_loop;
346 default:
347 *error_out = "Expected '=' or flag letter "
348 "(r, s, c, or d)";
349 goto out;
350 }
351 str++;
352 }
353
354 if (*str == '=')
355 *error_out = "Too many flags specified";
356 else
357 *error_out = "Missing '='";
358 goto out;
359
360break_loop:
361 backing_file = strchr(str, ',');
362
363 if (backing_file == NULL)
364 backing_file = strchr(str, ':');
365
366 if(backing_file != NULL){
367 if(ubd_dev->no_cow){
368 *error_out = "Can't specify both 'd' and a cow file";
369 goto out;
370 }
371 else {
372 *backing_file = '\0';
373 backing_file++;
374 }
375 }
376 err = 0;
377 ubd_dev->file = str;
378 ubd_dev->cow.file = backing_file;
379 ubd_dev->boot_openflags = flags;
380out:
381 mutex_unlock(&ubd_lock);
382 return err;
383}
384
385static int ubd_setup(char *str)
386{
387 char *error;
388 int err;
389
390 err = ubd_setup_common(str, NULL, &error);
391 if(err)
392 printk(KERN_ERR "Failed to initialize device with \"%s\" : "
393 "%s\n", str, error);
394 return 1;
395}
396
397__setup("ubd", ubd_setup);
398__uml_help(ubd_setup,
399"ubd<n><flags>=<filename>[(:|,)<filename2>]\n"
400" This is used to associate a device with a file in the underlying\n"
401" filesystem. When specifying two filenames, the first one is the\n"
402" COW name and the second is the backing file name. As separator you can\n"
403" use either a ':' or a ',': the first one allows writing things like;\n"
404" ubd0=~/Uml/root_cow:~/Uml/root_backing_file\n"
405" while with a ',' the shell would not expand the 2nd '~'.\n"
406" When using only one filename, UML will detect whether to treat it like\n"
407" a COW file or a backing file. To override this detection, add the 'd'\n"
408" flag:\n"
409" ubd0d=BackingFile\n"
410" Usually, there is a filesystem in the file, but \n"
411" that's not required. Swap devices containing swap files can be\n"
412" specified like this. Also, a file which doesn't contain a\n"
413" filesystem can have its contents read in the virtual \n"
414" machine by running 'dd' on the device. <n> must be in the range\n"
415" 0 to 7. Appending an 'r' to the number will cause that device\n"
416" to be mounted read-only. For example ubd1r=./ext_fs. Appending\n"
417" an 's' will cause data to be written to disk on the host immediately.\n"
418" 'c' will cause the device to be treated as being shared between multiple\n"
419" UMLs and file locking will be turned off - this is appropriate for a\n"
420" cluster filesystem and inappropriate at almost all other times.\n\n"
421);
422
423static int udb_setup(char *str)
424{
425 printk("udb%s specified on command line is almost certainly a ubd -> "
426 "udb TYPO\n", str);
427 return 1;
428}
429
430__setup("udb", udb_setup);
431__uml_help(udb_setup,
432"udb\n"
433" This option is here solely to catch ubd -> udb typos, which can be\n"
434" to impossible to catch visually unless you specifically look for\n"
435" them. The only result of any option starting with 'udb' is an error\n"
436" in the boot output.\n\n"
437);
438
439static void do_ubd_request(struct request_queue * q);
440
441/* Only changed by ubd_init, which is an initcall. */
442static int thread_fd = -1;
443static LIST_HEAD(restart);
444
445/* Function to read several request pointers at a time
446* handling fractional reads if (and as) needed
447*/
448
449static int bulk_req_safe_read(
450 int fd,
451 struct io_thread_req * (*request_buffer)[],
452 struct io_thread_req **remainder,
453 int *remainder_size,
454 int max_recs
455 )
456{
457 int n = 0;
458 int res = 0;
459
460 if (*remainder_size > 0) {
461 memmove(
462 (char *) request_buffer,
463 (char *) remainder, *remainder_size
464 );
465 n = *remainder_size;
466 }
467
468 res = os_read_file(
469 fd,
470 ((char *) request_buffer) + *remainder_size,
471 sizeof(struct io_thread_req *)*max_recs
472 - *remainder_size
473 );
474 if (res > 0) {
475 n += res;
476 if ((n % sizeof(struct io_thread_req *)) > 0) {
477 /*
478 * Read somehow returned not a multiple of dword
479 * theoretically possible, but never observed in the
480 * wild, so read routine must be able to handle it
481 */
482 *remainder_size = n % sizeof(struct io_thread_req *);
483 WARN(*remainder_size > 0, "UBD IPC read returned a partial result");
484 memmove(
485 remainder,
486 ((char *) request_buffer) +
487 (n/sizeof(struct io_thread_req *))*sizeof(struct io_thread_req *),
488 *remainder_size
489 );
490 n = n - *remainder_size;
491 }
492 } else {
493 n = res;
494 }
495 return n;
496}
497
498/* Called without dev->lock held, and only in interrupt context. */
499static void ubd_handler(void)
500{
501 struct ubd *ubd;
502 struct list_head *list, *next_ele;
503 unsigned long flags;
504 int n;
505 int count;
506
507 while(1){
508 n = bulk_req_safe_read(
509 thread_fd,
510 irq_req_buffer,
511 &irq_remainder,
512 &irq_remainder_size,
513 UBD_REQ_BUFFER_SIZE
514 );
515 if (n < 0) {
516 if(n == -EAGAIN)
517 break;
518 printk(KERN_ERR "spurious interrupt in ubd_handler, "
519 "err = %d\n", -n);
520 return;
521 }
522 for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
523 blk_end_request(
524 (*irq_req_buffer)[count]->req,
525 BLK_STS_OK,
526 (*irq_req_buffer)[count]->length
527 );
528 kfree((*irq_req_buffer)[count]);
529 }
530 }
531 reactivate_fd(thread_fd, UBD_IRQ);
532
533 list_for_each_safe(list, next_ele, &restart){
534 ubd = container_of(list, struct ubd, restart);
535 list_del_init(&ubd->restart);
536 spin_lock_irqsave(&ubd->lock, flags);
537 do_ubd_request(ubd->queue);
538 spin_unlock_irqrestore(&ubd->lock, flags);
539 }
540}
541
542static irqreturn_t ubd_intr(int irq, void *dev)
543{
544 ubd_handler();
545 return IRQ_HANDLED;
546}
547
548/* Only changed by ubd_init, which is an initcall. */
549static int io_pid = -1;
550
551static void kill_io_thread(void)
552{
553 if(io_pid != -1)
554 os_kill_process(io_pid, 1);
555}
556
557__uml_exitcall(kill_io_thread);
558
559static inline int ubd_file_size(struct ubd *ubd_dev, __u64 *size_out)
560{
561 char *file;
562 int fd;
563 int err;
564
565 __u32 version;
566 __u32 align;
567 char *backing_file;
568 time_t mtime;
569 unsigned long long size;
570 int sector_size;
571 int bitmap_offset;
572
573 if (ubd_dev->file && ubd_dev->cow.file) {
574 file = ubd_dev->cow.file;
575
576 goto out;
577 }
578
579 fd = os_open_file(ubd_dev->file, of_read(OPENFLAGS()), 0);
580 if (fd < 0)
581 return fd;
582
583 err = read_cow_header(file_reader, &fd, &version, &backing_file, \
584 &mtime, &size, &sector_size, &align, &bitmap_offset);
585 os_close_file(fd);
586
587 if(err == -EINVAL)
588 file = ubd_dev->file;
589 else
590 file = backing_file;
591
592out:
593 return os_file_size(file, size_out);
594}
595
596static int read_cow_bitmap(int fd, void *buf, int offset, int len)
597{
598 int err;
599
600 err = os_pread_file(fd, buf, len, offset);
601 if (err < 0)
602 return err;
603
604 return 0;
605}
606
607static int backing_file_mismatch(char *file, __u64 size, time_t mtime)
608{
609 unsigned long modtime;
610 unsigned long long actual;
611 int err;
612
613 err = os_file_modtime(file, &modtime);
614 if (err < 0) {
615 printk(KERN_ERR "Failed to get modification time of backing "
616 "file \"%s\", err = %d\n", file, -err);
617 return err;
618 }
619
620 err = os_file_size(file, &actual);
621 if (err < 0) {
622 printk(KERN_ERR "Failed to get size of backing file \"%s\", "
623 "err = %d\n", file, -err);
624 return err;
625 }
626
627 if (actual != size) {
628 /*__u64 can be a long on AMD64 and with %lu GCC complains; so
629 * the typecast.*/
630 printk(KERN_ERR "Size mismatch (%llu vs %llu) of COW header "
631 "vs backing file\n", (unsigned long long) size, actual);
632 return -EINVAL;
633 }
634 if (modtime != mtime) {
635 printk(KERN_ERR "mtime mismatch (%ld vs %ld) of COW header vs "
636 "backing file\n", mtime, modtime);
637 return -EINVAL;
638 }
639 return 0;
640}
641
642static int path_requires_switch(char *from_cmdline, char *from_cow, char *cow)
643{
644 struct uml_stat buf1, buf2;
645 int err;
646
647 if (from_cmdline == NULL)
648 return 0;
649 if (!strcmp(from_cmdline, from_cow))
650 return 0;
651
652 err = os_stat_file(from_cmdline, &buf1);
653 if (err < 0) {
654 printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cmdline,
655 -err);
656 return 0;
657 }
658 err = os_stat_file(from_cow, &buf2);
659 if (err < 0) {
660 printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cow,
661 -err);
662 return 1;
663 }
664 if ((buf1.ust_dev == buf2.ust_dev) && (buf1.ust_ino == buf2.ust_ino))
665 return 0;
666
667 printk(KERN_ERR "Backing file mismatch - \"%s\" requested, "
668 "\"%s\" specified in COW header of \"%s\"\n",
669 from_cmdline, from_cow, cow);
670 return 1;
671}
672
673static int open_ubd_file(char *file, struct openflags *openflags, int shared,
674 char **backing_file_out, int *bitmap_offset_out,
675 unsigned long *bitmap_len_out, int *data_offset_out,
676 int *create_cow_out)
677{
678 time_t mtime;
679 unsigned long long size;
680 __u32 version, align;
681 char *backing_file;
682 int fd, err, sectorsize, asked_switch, mode = 0644;
683
684 fd = os_open_file(file, *openflags, mode);
685 if (fd < 0) {
686 if ((fd == -ENOENT) && (create_cow_out != NULL))
687 *create_cow_out = 1;
688 if (!openflags->w ||
689 ((fd != -EROFS) && (fd != -EACCES)))
690 return fd;
691 openflags->w = 0;
692 fd = os_open_file(file, *openflags, mode);
693 if (fd < 0)
694 return fd;
695 }
696
697 if (shared)
698 printk(KERN_INFO "Not locking \"%s\" on the host\n", file);
699 else {
700 err = os_lock_file(fd, openflags->w);
701 if (err < 0) {
702 printk(KERN_ERR "Failed to lock '%s', err = %d\n",
703 file, -err);
704 goto out_close;
705 }
706 }
707
708 /* Successful return case! */
709 if (backing_file_out == NULL)
710 return fd;
711
712 err = read_cow_header(file_reader, &fd, &version, &backing_file, &mtime,
713 &size, &sectorsize, &align, bitmap_offset_out);
714 if (err && (*backing_file_out != NULL)) {
715 printk(KERN_ERR "Failed to read COW header from COW file "
716 "\"%s\", errno = %d\n", file, -err);
717 goto out_close;
718 }
719 if (err)
720 return fd;
721
722 asked_switch = path_requires_switch(*backing_file_out, backing_file,
723 file);
724
725 /* Allow switching only if no mismatch. */
726 if (asked_switch && !backing_file_mismatch(*backing_file_out, size,
727 mtime)) {
728 printk(KERN_ERR "Switching backing file to '%s'\n",
729 *backing_file_out);
730 err = write_cow_header(file, fd, *backing_file_out,
731 sectorsize, align, &size);
732 if (err) {
733 printk(KERN_ERR "Switch failed, errno = %d\n", -err);
734 goto out_close;
735 }
736 } else {
737 *backing_file_out = backing_file;
738 err = backing_file_mismatch(*backing_file_out, size, mtime);
739 if (err)
740 goto out_close;
741 }
742
743 cow_sizes(version, size, sectorsize, align, *bitmap_offset_out,
744 bitmap_len_out, data_offset_out);
745
746 return fd;
747 out_close:
748 os_close_file(fd);
749 return err;
750}
751
752static int create_cow_file(char *cow_file, char *backing_file,
753 struct openflags flags,
754 int sectorsize, int alignment, int *bitmap_offset_out,
755 unsigned long *bitmap_len_out, int *data_offset_out)
756{
757 int err, fd;
758
759 flags.c = 1;
760 fd = open_ubd_file(cow_file, &flags, 0, NULL, NULL, NULL, NULL, NULL);
761 if (fd < 0) {
762 err = fd;
763 printk(KERN_ERR "Open of COW file '%s' failed, errno = %d\n",
764 cow_file, -err);
765 goto out;
766 }
767
768 err = init_cow_file(fd, cow_file, backing_file, sectorsize, alignment,
769 bitmap_offset_out, bitmap_len_out,
770 data_offset_out);
771 if (!err)
772 return fd;
773 os_close_file(fd);
774 out:
775 return err;
776}
777
778static void ubd_close_dev(struct ubd *ubd_dev)
779{
780 os_close_file(ubd_dev->fd);
781 if(ubd_dev->cow.file == NULL)
782 return;
783
784 os_close_file(ubd_dev->cow.fd);
785 vfree(ubd_dev->cow.bitmap);
786 ubd_dev->cow.bitmap = NULL;
787}
788
789static int ubd_open_dev(struct ubd *ubd_dev)
790{
791 struct openflags flags;
792 char **back_ptr;
793 int err, create_cow, *create_ptr;
794 int fd;
795
796 ubd_dev->openflags = ubd_dev->boot_openflags;
797 create_cow = 0;
798 create_ptr = (ubd_dev->cow.file != NULL) ? &create_cow : NULL;
799 back_ptr = ubd_dev->no_cow ? NULL : &ubd_dev->cow.file;
800
801 fd = open_ubd_file(ubd_dev->file, &ubd_dev->openflags, ubd_dev->shared,
802 back_ptr, &ubd_dev->cow.bitmap_offset,
803 &ubd_dev->cow.bitmap_len, &ubd_dev->cow.data_offset,
804 create_ptr);
805
806 if((fd == -ENOENT) && create_cow){
807 fd = create_cow_file(ubd_dev->file, ubd_dev->cow.file,
808 ubd_dev->openflags, 1 << 9, PAGE_SIZE,
809 &ubd_dev->cow.bitmap_offset,
810 &ubd_dev->cow.bitmap_len,
811 &ubd_dev->cow.data_offset);
812 if(fd >= 0){
813 printk(KERN_INFO "Creating \"%s\" as COW file for "
814 "\"%s\"\n", ubd_dev->file, ubd_dev->cow.file);
815 }
816 }
817
818 if(fd < 0){
819 printk("Failed to open '%s', errno = %d\n", ubd_dev->file,
820 -fd);
821 return fd;
822 }
823 ubd_dev->fd = fd;
824
825 if(ubd_dev->cow.file != NULL){
826 blk_queue_max_hw_sectors(ubd_dev->queue, 8 * sizeof(long));
827
828 err = -ENOMEM;
829 ubd_dev->cow.bitmap = vmalloc(ubd_dev->cow.bitmap_len);
830 if(ubd_dev->cow.bitmap == NULL){
831 printk(KERN_ERR "Failed to vmalloc COW bitmap\n");
832 goto error;
833 }
834 flush_tlb_kernel_vm();
835
836 err = read_cow_bitmap(ubd_dev->fd, ubd_dev->cow.bitmap,
837 ubd_dev->cow.bitmap_offset,
838 ubd_dev->cow.bitmap_len);
839 if(err < 0)
840 goto error;
841
842 flags = ubd_dev->openflags;
843 flags.w = 0;
844 err = open_ubd_file(ubd_dev->cow.file, &flags, ubd_dev->shared, NULL,
845 NULL, NULL, NULL, NULL);
846 if(err < 0) goto error;
847 ubd_dev->cow.fd = err;
848 }
849 return 0;
850 error:
851 os_close_file(ubd_dev->fd);
852 return err;
853}
854
855static void ubd_device_release(struct device *dev)
856{
857 struct ubd *ubd_dev = dev_get_drvdata(dev);
858
859 blk_cleanup_queue(ubd_dev->queue);
860 *ubd_dev = ((struct ubd) DEFAULT_UBD);
861}
862
863static int ubd_disk_register(int major, u64 size, int unit,
864 struct gendisk **disk_out)
865{
866 struct device *parent = NULL;
867 struct gendisk *disk;
868
869 disk = alloc_disk(1 << UBD_SHIFT);
870 if(disk == NULL)
871 return -ENOMEM;
872
873 disk->major = major;
874 disk->first_minor = unit << UBD_SHIFT;
875 disk->fops = &ubd_blops;
876 set_capacity(disk, size / 512);
877 if (major == UBD_MAJOR)
878 sprintf(disk->disk_name, "ubd%c", 'a' + unit);
879 else
880 sprintf(disk->disk_name, "ubd_fake%d", unit);
881
882 /* sysfs register (not for ide fake devices) */
883 if (major == UBD_MAJOR) {
884 ubd_devs[unit].pdev.id = unit;
885 ubd_devs[unit].pdev.name = DRIVER_NAME;
886 ubd_devs[unit].pdev.dev.release = ubd_device_release;
887 dev_set_drvdata(&ubd_devs[unit].pdev.dev, &ubd_devs[unit]);
888 platform_device_register(&ubd_devs[unit].pdev);
889 parent = &ubd_devs[unit].pdev.dev;
890 }
891
892 disk->private_data = &ubd_devs[unit];
893 disk->queue = ubd_devs[unit].queue;
894 device_add_disk(parent, disk);
895
896 *disk_out = disk;
897 return 0;
898}
899
900#define ROUND_BLOCK(n) ((n + ((1 << 9) - 1)) & (-1 << 9))
901
902static int ubd_add(int n, char **error_out)
903{
904 struct ubd *ubd_dev = &ubd_devs[n];
905 int err = 0;
906
907 if(ubd_dev->file == NULL)
908 goto out;
909
910 err = ubd_file_size(ubd_dev, &ubd_dev->size);
911 if(err < 0){
912 *error_out = "Couldn't determine size of device's file";
913 goto out;
914 }
915
916 ubd_dev->size = ROUND_BLOCK(ubd_dev->size);
917
918 INIT_LIST_HEAD(&ubd_dev->restart);
919 sg_init_table(ubd_dev->sg, MAX_SG);
920
921 err = -ENOMEM;
922 ubd_dev->queue = blk_init_queue(do_ubd_request, &ubd_dev->lock);
923 if (ubd_dev->queue == NULL) {
924 *error_out = "Failed to initialize device queue";
925 goto out;
926 }
927 ubd_dev->queue->queuedata = ubd_dev;
928 blk_queue_write_cache(ubd_dev->queue, true, false);
929
930 blk_queue_max_segments(ubd_dev->queue, MAX_SG);
931 err = ubd_disk_register(UBD_MAJOR, ubd_dev->size, n, &ubd_gendisk[n]);
932 if(err){
933 *error_out = "Failed to register device";
934 goto out_cleanup;
935 }
936
937 if (fake_major != UBD_MAJOR)
938 ubd_disk_register(fake_major, ubd_dev->size, n,
939 &fake_gendisk[n]);
940
941 /*
942 * Perhaps this should also be under the "if (fake_major)" above
943 * using the fake_disk->disk_name
944 */
945 if (fake_ide)
946 make_ide_entries(ubd_gendisk[n]->disk_name);
947
948 err = 0;
949out:
950 return err;
951
952out_cleanup:
953 blk_cleanup_queue(ubd_dev->queue);
954 goto out;
955}
956
957static int ubd_config(char *str, char **error_out)
958{
959 int n, ret;
960
961 /* This string is possibly broken up and stored, so it's only
962 * freed if ubd_setup_common fails, or if only general options
963 * were set.
964 */
965 str = kstrdup(str, GFP_KERNEL);
966 if (str == NULL) {
967 *error_out = "Failed to allocate memory";
968 return -ENOMEM;
969 }
970
971 ret = ubd_setup_common(str, &n, error_out);
972 if (ret)
973 goto err_free;
974
975 if (n == -1) {
976 ret = 0;
977 goto err_free;
978 }
979
980 mutex_lock(&ubd_lock);
981 ret = ubd_add(n, error_out);
982 if (ret)
983 ubd_devs[n].file = NULL;
984 mutex_unlock(&ubd_lock);
985
986out:
987 return ret;
988
989err_free:
990 kfree(str);
991 goto out;
992}
993
994static int ubd_get_config(char *name, char *str, int size, char **error_out)
995{
996 struct ubd *ubd_dev;
997 int n, len = 0;
998
999 n = parse_unit(&name);
1000 if((n >= MAX_DEV) || (n < 0)){
1001 *error_out = "ubd_get_config : device number out of range";
1002 return -1;
1003 }
1004
1005 ubd_dev = &ubd_devs[n];
1006 mutex_lock(&ubd_lock);
1007
1008 if(ubd_dev->file == NULL){
1009 CONFIG_CHUNK(str, size, len, "", 1);
1010 goto out;
1011 }
1012
1013 CONFIG_CHUNK(str, size, len, ubd_dev->file, 0);
1014
1015 if(ubd_dev->cow.file != NULL){
1016 CONFIG_CHUNK(str, size, len, ",", 0);
1017 CONFIG_CHUNK(str, size, len, ubd_dev->cow.file, 1);
1018 }
1019 else CONFIG_CHUNK(str, size, len, "", 1);
1020
1021 out:
1022 mutex_unlock(&ubd_lock);
1023 return len;
1024}
1025
1026static int ubd_id(char **str, int *start_out, int *end_out)
1027{
1028 int n;
1029
1030 n = parse_unit(str);
1031 *start_out = 0;
1032 *end_out = MAX_DEV - 1;
1033 return n;
1034}
1035
1036static int ubd_remove(int n, char **error_out)
1037{
1038 struct gendisk *disk = ubd_gendisk[n];
1039 struct ubd *ubd_dev;
1040 int err = -ENODEV;
1041
1042 mutex_lock(&ubd_lock);
1043
1044 ubd_dev = &ubd_devs[n];
1045
1046 if(ubd_dev->file == NULL)
1047 goto out;
1048
1049 /* you cannot remove a open disk */
1050 err = -EBUSY;
1051 if(ubd_dev->count > 0)
1052 goto out;
1053
1054 ubd_gendisk[n] = NULL;
1055 if(disk != NULL){
1056 del_gendisk(disk);
1057 put_disk(disk);
1058 }
1059
1060 if(fake_gendisk[n] != NULL){
1061 del_gendisk(fake_gendisk[n]);
1062 put_disk(fake_gendisk[n]);
1063 fake_gendisk[n] = NULL;
1064 }
1065
1066 err = 0;
1067 platform_device_unregister(&ubd_dev->pdev);
1068out:
1069 mutex_unlock(&ubd_lock);
1070 return err;
1071}
1072
1073/* All these are called by mconsole in process context and without
1074 * ubd-specific locks. The structure itself is const except for .list.
1075 */
1076static struct mc_device ubd_mc = {
1077 .list = LIST_HEAD_INIT(ubd_mc.list),
1078 .name = "ubd",
1079 .config = ubd_config,
1080 .get_config = ubd_get_config,
1081 .id = ubd_id,
1082 .remove = ubd_remove,
1083};
1084
1085static int __init ubd_mc_init(void)
1086{
1087 mconsole_register_dev(&ubd_mc);
1088 return 0;
1089}
1090
1091__initcall(ubd_mc_init);
1092
1093static int __init ubd0_init(void)
1094{
1095 struct ubd *ubd_dev = &ubd_devs[0];
1096
1097 mutex_lock(&ubd_lock);
1098 if(ubd_dev->file == NULL)
1099 ubd_dev->file = "root_fs";
1100 mutex_unlock(&ubd_lock);
1101
1102 return 0;
1103}
1104
1105__initcall(ubd0_init);
1106
1107/* Used in ubd_init, which is an initcall */
1108static struct platform_driver ubd_driver = {
1109 .driver = {
1110 .name = DRIVER_NAME,
1111 },
1112};
1113
1114static int __init ubd_init(void)
1115{
1116 char *error;
1117 int i, err;
1118
1119 if (register_blkdev(UBD_MAJOR, "ubd"))
1120 return -1;
1121
1122 if (fake_major != UBD_MAJOR) {
1123 char name[sizeof("ubd_nnn\0")];
1124
1125 snprintf(name, sizeof(name), "ubd_%d", fake_major);
1126 if (register_blkdev(fake_major, "ubd"))
1127 return -1;
1128 }
1129
1130 irq_req_buffer = kmalloc_array(UBD_REQ_BUFFER_SIZE,
1131 sizeof(struct io_thread_req *),
1132 GFP_KERNEL
1133 );
1134 irq_remainder = 0;
1135
1136 if (irq_req_buffer == NULL) {
1137 printk(KERN_ERR "Failed to initialize ubd buffering\n");
1138 return -1;
1139 }
1140 io_req_buffer = kmalloc_array(UBD_REQ_BUFFER_SIZE,
1141 sizeof(struct io_thread_req *),
1142 GFP_KERNEL
1143 );
1144
1145 io_remainder = 0;
1146
1147 if (io_req_buffer == NULL) {
1148 printk(KERN_ERR "Failed to initialize ubd buffering\n");
1149 return -1;
1150 }
1151 platform_driver_register(&ubd_driver);
1152 mutex_lock(&ubd_lock);
1153 for (i = 0; i < MAX_DEV; i++){
1154 err = ubd_add(i, &error);
1155 if(err)
1156 printk(KERN_ERR "Failed to initialize ubd device %d :"
1157 "%s\n", i, error);
1158 }
1159 mutex_unlock(&ubd_lock);
1160 return 0;
1161}
1162
1163late_initcall(ubd_init);
1164
1165static int __init ubd_driver_init(void){
1166 unsigned long stack;
1167 int err;
1168
1169 /* Set by CONFIG_BLK_DEV_UBD_SYNC or ubd=sync.*/
1170 if(global_openflags.s){
1171 printk(KERN_INFO "ubd: Synchronous mode\n");
1172 /* Letting ubd=sync be like using ubd#s= instead of ubd#= is
1173 * enough. So use anyway the io thread. */
1174 }
1175 stack = alloc_stack(0, 0);
1176 io_pid = start_io_thread(stack + PAGE_SIZE - sizeof(void *),
1177 &thread_fd);
1178 if(io_pid < 0){
1179 printk(KERN_ERR
1180 "ubd : Failed to start I/O thread (errno = %d) - "
1181 "falling back to synchronous I/O\n", -io_pid);
1182 io_pid = -1;
1183 return 0;
1184 }
1185 err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr,
1186 0, "ubd", ubd_devs);
1187 if(err != 0)
1188 printk(KERN_ERR "um_request_irq failed - errno = %d\n", -err);
1189 return 0;
1190}
1191
1192device_initcall(ubd_driver_init);
1193
1194static int ubd_open(struct block_device *bdev, fmode_t mode)
1195{
1196 struct gendisk *disk = bdev->bd_disk;
1197 struct ubd *ubd_dev = disk->private_data;
1198 int err = 0;
1199
1200 mutex_lock(&ubd_mutex);
1201 if(ubd_dev->count == 0){
1202 err = ubd_open_dev(ubd_dev);
1203 if(err){
1204 printk(KERN_ERR "%s: Can't open \"%s\": errno = %d\n",
1205 disk->disk_name, ubd_dev->file, -err);
1206 goto out;
1207 }
1208 }
1209 ubd_dev->count++;
1210 set_disk_ro(disk, !ubd_dev->openflags.w);
1211
1212 /* This should no more be needed. And it didn't work anyway to exclude
1213 * read-write remounting of filesystems.*/
1214 /*if((mode & FMODE_WRITE) && !ubd_dev->openflags.w){
1215 if(--ubd_dev->count == 0) ubd_close_dev(ubd_dev);
1216 err = -EROFS;
1217 }*/
1218out:
1219 mutex_unlock(&ubd_mutex);
1220 return err;
1221}
1222
1223static void ubd_release(struct gendisk *disk, fmode_t mode)
1224{
1225 struct ubd *ubd_dev = disk->private_data;
1226
1227 mutex_lock(&ubd_mutex);
1228 if(--ubd_dev->count == 0)
1229 ubd_close_dev(ubd_dev);
1230 mutex_unlock(&ubd_mutex);
1231}
1232
1233static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask,
1234 __u64 *cow_offset, unsigned long *bitmap,
1235 __u64 bitmap_offset, unsigned long *bitmap_words,
1236 __u64 bitmap_len)
1237{
1238 __u64 sector = io_offset >> 9;
1239 int i, update_bitmap = 0;
1240
1241 for(i = 0; i < length >> 9; i++){
1242 if(cow_mask != NULL)
1243 ubd_set_bit(i, (unsigned char *) cow_mask);
1244 if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
1245 continue;
1246
1247 update_bitmap = 1;
1248 ubd_set_bit(sector + i, (unsigned char *) bitmap);
1249 }
1250
1251 if(!update_bitmap)
1252 return;
1253
1254 *cow_offset = sector / (sizeof(unsigned long) * 8);
1255
1256 /* This takes care of the case where we're exactly at the end of the
1257 * device, and *cow_offset + 1 is off the end. So, just back it up
1258 * by one word. Thanks to Lynn Kerby for the fix and James McMechan
1259 * for the original diagnosis.
1260 */
1261 if (*cow_offset == (DIV_ROUND_UP(bitmap_len,
1262 sizeof(unsigned long)) - 1))
1263 (*cow_offset)--;
1264
1265 bitmap_words[0] = bitmap[*cow_offset];
1266 bitmap_words[1] = bitmap[*cow_offset + 1];
1267
1268 *cow_offset *= sizeof(unsigned long);
1269 *cow_offset += bitmap_offset;
1270}
1271
1272static void cowify_req(struct io_thread_req *req, unsigned long *bitmap,
1273 __u64 bitmap_offset, __u64 bitmap_len)
1274{
1275 __u64 sector = req->offset >> 9;
1276 int i;
1277
1278 if(req->length > (sizeof(req->sector_mask) * 8) << 9)
1279 panic("Operation too long");
1280
1281 if(req->op == UBD_READ) {
1282 for(i = 0; i < req->length >> 9; i++){
1283 if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
1284 ubd_set_bit(i, (unsigned char *)
1285 &req->sector_mask);
1286 }
1287 }
1288 else cowify_bitmap(req->offset, req->length, &req->sector_mask,
1289 &req->cow_offset, bitmap, bitmap_offset,
1290 req->bitmap_words, bitmap_len);
1291}
1292
1293/* Called with dev->lock held */
1294static void prepare_request(struct request *req, struct io_thread_req *io_req,
1295 unsigned long long offset, int page_offset,
1296 int len, struct page *page)
1297{
1298 struct gendisk *disk = req->rq_disk;
1299 struct ubd *ubd_dev = disk->private_data;
1300
1301 io_req->req = req;
1302 io_req->fds[0] = (ubd_dev->cow.file != NULL) ? ubd_dev->cow.fd :
1303 ubd_dev->fd;
1304 io_req->fds[1] = ubd_dev->fd;
1305 io_req->cow_offset = -1;
1306 io_req->offset = offset;
1307 io_req->length = len;
1308 io_req->error = 0;
1309 io_req->sector_mask = 0;
1310
1311 io_req->op = (rq_data_dir(req) == READ) ? UBD_READ : UBD_WRITE;
1312 io_req->offsets[0] = 0;
1313 io_req->offsets[1] = ubd_dev->cow.data_offset;
1314 io_req->buffer = page_address(page) + page_offset;
1315 io_req->sectorsize = 1 << 9;
1316
1317 if(ubd_dev->cow.file != NULL)
1318 cowify_req(io_req, ubd_dev->cow.bitmap,
1319 ubd_dev->cow.bitmap_offset, ubd_dev->cow.bitmap_len);
1320
1321}
1322
1323/* Called with dev->lock held */
1324static void prepare_flush_request(struct request *req,
1325 struct io_thread_req *io_req)
1326{
1327 struct gendisk *disk = req->rq_disk;
1328 struct ubd *ubd_dev = disk->private_data;
1329
1330 io_req->req = req;
1331 io_req->fds[0] = (ubd_dev->cow.file != NULL) ? ubd_dev->cow.fd :
1332 ubd_dev->fd;
1333 io_req->op = UBD_FLUSH;
1334}
1335
1336static bool submit_request(struct io_thread_req *io_req, struct ubd *dev)
1337{
1338 int n = os_write_file(thread_fd, &io_req,
1339 sizeof(io_req));
1340 if (n != sizeof(io_req)) {
1341 if (n != -EAGAIN)
1342 printk("write to io thread failed, "
1343 "errno = %d\n", -n);
1344 else if (list_empty(&dev->restart))
1345 list_add(&dev->restart, &restart);
1346
1347 kfree(io_req);
1348 return false;
1349 }
1350 return true;
1351}
1352
1353/* Called with dev->lock held */
1354static void do_ubd_request(struct request_queue *q)
1355{
1356 struct io_thread_req *io_req;
1357 struct request *req;
1358
1359 while(1){
1360 struct ubd *dev = q->queuedata;
1361 if(dev->request == NULL){
1362 struct request *req = blk_fetch_request(q);
1363 if(req == NULL)
1364 return;
1365
1366 dev->request = req;
1367 dev->rq_pos = blk_rq_pos(req);
1368 dev->start_sg = 0;
1369 dev->end_sg = blk_rq_map_sg(q, req, dev->sg);
1370 }
1371
1372 req = dev->request;
1373
1374 if (req_op(req) == REQ_OP_FLUSH) {
1375 io_req = kmalloc(sizeof(struct io_thread_req),
1376 GFP_ATOMIC);
1377 if (io_req == NULL) {
1378 if (list_empty(&dev->restart))
1379 list_add(&dev->restart, &restart);
1380 return;
1381 }
1382 prepare_flush_request(req, io_req);
1383 if (submit_request(io_req, dev) == false)
1384 return;
1385 }
1386
1387 while(dev->start_sg < dev->end_sg){
1388 struct scatterlist *sg = &dev->sg[dev->start_sg];
1389
1390 io_req = kmalloc(sizeof(struct io_thread_req),
1391 GFP_ATOMIC);
1392 if(io_req == NULL){
1393 if(list_empty(&dev->restart))
1394 list_add(&dev->restart, &restart);
1395 return;
1396 }
1397 prepare_request(req, io_req,
1398 (unsigned long long)dev->rq_pos << 9,
1399 sg->offset, sg->length, sg_page(sg));
1400
1401 if (submit_request(io_req, dev) == false)
1402 return;
1403
1404 dev->rq_pos += sg->length >> 9;
1405 dev->start_sg++;
1406 }
1407 dev->end_sg = 0;
1408 dev->request = NULL;
1409 }
1410}
1411
1412static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
1413{
1414 struct ubd *ubd_dev = bdev->bd_disk->private_data;
1415
1416 geo->heads = 128;
1417 geo->sectors = 32;
1418 geo->cylinders = ubd_dev->size / (128 * 32 * 512);
1419 return 0;
1420}
1421
1422static int ubd_ioctl(struct block_device *bdev, fmode_t mode,
1423 unsigned int cmd, unsigned long arg)
1424{
1425 struct ubd *ubd_dev = bdev->bd_disk->private_data;
1426 u16 ubd_id[ATA_ID_WORDS];
1427
1428 switch (cmd) {
1429 struct cdrom_volctrl volume;
1430 case HDIO_GET_IDENTITY:
1431 memset(&ubd_id, 0, ATA_ID_WORDS * 2);
1432 ubd_id[ATA_ID_CYLS] = ubd_dev->size / (128 * 32 * 512);
1433 ubd_id[ATA_ID_HEADS] = 128;
1434 ubd_id[ATA_ID_SECTORS] = 32;
1435 if(copy_to_user((char __user *) arg, (char *) &ubd_id,
1436 sizeof(ubd_id)))
1437 return -EFAULT;
1438 return 0;
1439
1440 case CDROMVOLREAD:
1441 if(copy_from_user(&volume, (char __user *) arg, sizeof(volume)))
1442 return -EFAULT;
1443 volume.channel0 = 255;
1444 volume.channel1 = 255;
1445 volume.channel2 = 255;
1446 volume.channel3 = 255;
1447 if(copy_to_user((char __user *) arg, &volume, sizeof(volume)))
1448 return -EFAULT;
1449 return 0;
1450 }
1451 return -EINVAL;
1452}
1453
1454static int update_bitmap(struct io_thread_req *req)
1455{
1456 int n;
1457
1458 if(req->cow_offset == -1)
1459 return 0;
1460
1461 n = os_pwrite_file(req->fds[1], &req->bitmap_words,
1462 sizeof(req->bitmap_words), req->cow_offset);
1463 if(n != sizeof(req->bitmap_words)){
1464 printk("do_io - bitmap update failed, err = %d fd = %d\n", -n,
1465 req->fds[1]);
1466 return 1;
1467 }
1468
1469 return 0;
1470}
1471
1472static void do_io(struct io_thread_req *req)
1473{
1474 char *buf;
1475 unsigned long len;
1476 int n, nsectors, start, end, bit;
1477 __u64 off;
1478
1479 if (req->op == UBD_FLUSH) {
1480 /* fds[0] is always either the rw image or our cow file */
1481 n = os_sync_file(req->fds[0]);
1482 if (n != 0) {
1483 printk("do_io - sync failed err = %d "
1484 "fd = %d\n", -n, req->fds[0]);
1485 req->error = 1;
1486 }
1487 return;
1488 }
1489
1490 nsectors = req->length / req->sectorsize;
1491 start = 0;
1492 do {
1493 bit = ubd_test_bit(start, (unsigned char *) &req->sector_mask);
1494 end = start;
1495 while((end < nsectors) &&
1496 (ubd_test_bit(end, (unsigned char *)
1497 &req->sector_mask) == bit))
1498 end++;
1499
1500 off = req->offset + req->offsets[bit] +
1501 start * req->sectorsize;
1502 len = (end - start) * req->sectorsize;
1503 buf = &req->buffer[start * req->sectorsize];
1504
1505 if(req->op == UBD_READ){
1506 n = 0;
1507 do {
1508 buf = &buf[n];
1509 len -= n;
1510 n = os_pread_file(req->fds[bit], buf, len, off);
1511 if (n < 0) {
1512 printk("do_io - read failed, err = %d "
1513 "fd = %d\n", -n, req->fds[bit]);
1514 req->error = 1;
1515 return;
1516 }
1517 } while((n < len) && (n != 0));
1518 if (n < len) memset(&buf[n], 0, len - n);
1519 } else {
1520 n = os_pwrite_file(req->fds[bit], buf, len, off);
1521 if(n != len){
1522 printk("do_io - write failed err = %d "
1523 "fd = %d\n", -n, req->fds[bit]);
1524 req->error = 1;
1525 return;
1526 }
1527 }
1528
1529 start = end;
1530 } while(start < nsectors);
1531
1532 req->error = update_bitmap(req);
1533}
1534
1535/* Changed in start_io_thread, which is serialized by being called only
1536 * from ubd_init, which is an initcall.
1537 */
1538int kernel_fd = -1;
1539
1540/* Only changed by the io thread. XXX: currently unused. */
1541static int io_count = 0;
1542
1543int io_thread(void *arg)
1544{
1545 int n, count, written, res;
1546
1547 os_fix_helper_signals();
1548
1549 while(1){
1550 n = bulk_req_safe_read(
1551 kernel_fd,
1552 io_req_buffer,
1553 &io_remainder,
1554 &io_remainder_size,
1555 UBD_REQ_BUFFER_SIZE
1556 );
1557 if (n < 0) {
1558 if (n == -EAGAIN) {
1559 ubd_read_poll(-1);
1560 continue;
1561 } else {
1562 printk("io_thread - read failed, fd = %d, "
1563 "err = %d,"
1564 "reminder = %d\n",
1565 kernel_fd, -n, io_remainder_size);
1566 }
1567 }
1568
1569 for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
1570 io_count++;
1571 do_io((*io_req_buffer)[count]);
1572 }
1573
1574 written = 0;
1575
1576 do {
1577 res = os_write_file(kernel_fd, ((char *) io_req_buffer) + written, n);
1578 if (res >= 0) {
1579 written += res;
1580 } else {
1581 if (res != -EAGAIN) {
1582 printk("io_thread - write failed, fd = %d, "
1583 "err = %d\n", kernel_fd, -n);
1584 }
1585 }
1586 if (written < n) {
1587 ubd_write_poll(-1);
1588 }
1589 } while (written < n);
1590 }
1591
1592 return 0;
1593}