blob: 41b06af19536820ca0eed5fb52dcda001acbd7b4 [file] [log] [blame]
David Brazdil0f672f62019-12-10 10:32:29 +00001// SPDX-License-Identifier: GPL-2.0-only
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002#include <linux/export.h>
3#include <linux/bvec.h>
4#include <linux/uio.h>
5#include <linux/pagemap.h>
6#include <linux/slab.h>
7#include <linux/vmalloc.h>
8#include <linux/splice.h>
9#include <net/checksum.h>
David Brazdil0f672f62019-12-10 10:32:29 +000010#include <linux/scatterlist.h>
Andrew Scullb4b6d4a2019-01-02 15:54:55 +000011
12#define PIPE_PARANOIA /* for now */
13
14#define iterate_iovec(i, n, __v, __p, skip, STEP) { \
15 size_t left; \
16 size_t wanted = n; \
17 __p = i->iov; \
18 __v.iov_len = min(n, __p->iov_len - skip); \
19 if (likely(__v.iov_len)) { \
20 __v.iov_base = __p->iov_base + skip; \
21 left = (STEP); \
22 __v.iov_len -= left; \
23 skip += __v.iov_len; \
24 n -= __v.iov_len; \
25 } else { \
26 left = 0; \
27 } \
28 while (unlikely(!left && n)) { \
29 __p++; \
30 __v.iov_len = min(n, __p->iov_len); \
31 if (unlikely(!__v.iov_len)) \
32 continue; \
33 __v.iov_base = __p->iov_base; \
34 left = (STEP); \
35 __v.iov_len -= left; \
36 skip = __v.iov_len; \
37 n -= __v.iov_len; \
38 } \
39 n = wanted - n; \
40}
41
42#define iterate_kvec(i, n, __v, __p, skip, STEP) { \
43 size_t wanted = n; \
44 __p = i->kvec; \
45 __v.iov_len = min(n, __p->iov_len - skip); \
46 if (likely(__v.iov_len)) { \
47 __v.iov_base = __p->iov_base + skip; \
48 (void)(STEP); \
49 skip += __v.iov_len; \
50 n -= __v.iov_len; \
51 } \
52 while (unlikely(n)) { \
53 __p++; \
54 __v.iov_len = min(n, __p->iov_len); \
55 if (unlikely(!__v.iov_len)) \
56 continue; \
57 __v.iov_base = __p->iov_base; \
58 (void)(STEP); \
59 skip = __v.iov_len; \
60 n -= __v.iov_len; \
61 } \
62 n = wanted; \
63}
64
65#define iterate_bvec(i, n, __v, __bi, skip, STEP) { \
66 struct bvec_iter __start; \
67 __start.bi_size = n; \
68 __start.bi_bvec_done = skip; \
69 __start.bi_idx = 0; \
70 for_each_bvec(__v, i->bvec, __bi, __start) { \
71 if (!__v.bv_len) \
72 continue; \
73 (void)(STEP); \
74 } \
75}
76
77#define iterate_all_kinds(i, n, v, I, B, K) { \
78 if (likely(n)) { \
79 size_t skip = i->iov_offset; \
80 if (unlikely(i->type & ITER_BVEC)) { \
81 struct bio_vec v; \
82 struct bvec_iter __bi; \
83 iterate_bvec(i, n, v, __bi, skip, (B)) \
84 } else if (unlikely(i->type & ITER_KVEC)) { \
85 const struct kvec *kvec; \
86 struct kvec v; \
87 iterate_kvec(i, n, v, kvec, skip, (K)) \
David Brazdil0f672f62019-12-10 10:32:29 +000088 } else if (unlikely(i->type & ITER_DISCARD)) { \
Andrew Scullb4b6d4a2019-01-02 15:54:55 +000089 } else { \
90 const struct iovec *iov; \
91 struct iovec v; \
92 iterate_iovec(i, n, v, iov, skip, (I)) \
93 } \
94 } \
95}
96
97#define iterate_and_advance(i, n, v, I, B, K) { \
98 if (unlikely(i->count < n)) \
99 n = i->count; \
100 if (i->count) { \
101 size_t skip = i->iov_offset; \
102 if (unlikely(i->type & ITER_BVEC)) { \
103 const struct bio_vec *bvec = i->bvec; \
104 struct bio_vec v; \
105 struct bvec_iter __bi; \
106 iterate_bvec(i, n, v, __bi, skip, (B)) \
107 i->bvec = __bvec_iter_bvec(i->bvec, __bi); \
108 i->nr_segs -= i->bvec - bvec; \
109 skip = __bi.bi_bvec_done; \
110 } else if (unlikely(i->type & ITER_KVEC)) { \
111 const struct kvec *kvec; \
112 struct kvec v; \
113 iterate_kvec(i, n, v, kvec, skip, (K)) \
114 if (skip == kvec->iov_len) { \
115 kvec++; \
116 skip = 0; \
117 } \
118 i->nr_segs -= kvec - i->kvec; \
119 i->kvec = kvec; \
David Brazdil0f672f62019-12-10 10:32:29 +0000120 } else if (unlikely(i->type & ITER_DISCARD)) { \
121 skip += n; \
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000122 } else { \
123 const struct iovec *iov; \
124 struct iovec v; \
125 iterate_iovec(i, n, v, iov, skip, (I)) \
126 if (skip == iov->iov_len) { \
127 iov++; \
128 skip = 0; \
129 } \
130 i->nr_segs -= iov - i->iov; \
131 i->iov = iov; \
132 } \
133 i->count -= n; \
134 i->iov_offset = skip; \
135 } \
136}
137
138static int copyout(void __user *to, const void *from, size_t n)
139{
David Brazdil0f672f62019-12-10 10:32:29 +0000140 if (access_ok(to, n)) {
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000141 kasan_check_read(from, n);
142 n = raw_copy_to_user(to, from, n);
143 }
144 return n;
145}
146
147static int copyin(void *to, const void __user *from, size_t n)
148{
David Brazdil0f672f62019-12-10 10:32:29 +0000149 if (access_ok(from, n)) {
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000150 kasan_check_write(to, n);
151 n = raw_copy_from_user(to, from, n);
152 }
153 return n;
154}
155
156static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t bytes,
157 struct iov_iter *i)
158{
159 size_t skip, copy, left, wanted;
160 const struct iovec *iov;
161 char __user *buf;
162 void *kaddr, *from;
163
164 if (unlikely(bytes > i->count))
165 bytes = i->count;
166
167 if (unlikely(!bytes))
168 return 0;
169
170 might_fault();
171 wanted = bytes;
172 iov = i->iov;
173 skip = i->iov_offset;
174 buf = iov->iov_base + skip;
175 copy = min(bytes, iov->iov_len - skip);
176
177 if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_writeable(buf, copy)) {
178 kaddr = kmap_atomic(page);
179 from = kaddr + offset;
180
181 /* first chunk, usually the only one */
182 left = copyout(buf, from, copy);
183 copy -= left;
184 skip += copy;
185 from += copy;
186 bytes -= copy;
187
188 while (unlikely(!left && bytes)) {
189 iov++;
190 buf = iov->iov_base;
191 copy = min(bytes, iov->iov_len);
192 left = copyout(buf, from, copy);
193 copy -= left;
194 skip = copy;
195 from += copy;
196 bytes -= copy;
197 }
198 if (likely(!bytes)) {
199 kunmap_atomic(kaddr);
200 goto done;
201 }
202 offset = from - kaddr;
203 buf += copy;
204 kunmap_atomic(kaddr);
205 copy = min(bytes, iov->iov_len - skip);
206 }
207 /* Too bad - revert to non-atomic kmap */
208
209 kaddr = kmap(page);
210 from = kaddr + offset;
211 left = copyout(buf, from, copy);
212 copy -= left;
213 skip += copy;
214 from += copy;
215 bytes -= copy;
216 while (unlikely(!left && bytes)) {
217 iov++;
218 buf = iov->iov_base;
219 copy = min(bytes, iov->iov_len);
220 left = copyout(buf, from, copy);
221 copy -= left;
222 skip = copy;
223 from += copy;
224 bytes -= copy;
225 }
226 kunmap(page);
227
228done:
229 if (skip == iov->iov_len) {
230 iov++;
231 skip = 0;
232 }
233 i->count -= wanted - bytes;
234 i->nr_segs -= iov - i->iov;
235 i->iov = iov;
236 i->iov_offset = skip;
237 return wanted - bytes;
238}
239
240static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t bytes,
241 struct iov_iter *i)
242{
243 size_t skip, copy, left, wanted;
244 const struct iovec *iov;
245 char __user *buf;
246 void *kaddr, *to;
247
248 if (unlikely(bytes > i->count))
249 bytes = i->count;
250
251 if (unlikely(!bytes))
252 return 0;
253
254 might_fault();
255 wanted = bytes;
256 iov = i->iov;
257 skip = i->iov_offset;
258 buf = iov->iov_base + skip;
259 copy = min(bytes, iov->iov_len - skip);
260
261 if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_readable(buf, copy)) {
262 kaddr = kmap_atomic(page);
263 to = kaddr + offset;
264
265 /* first chunk, usually the only one */
266 left = copyin(to, buf, copy);
267 copy -= left;
268 skip += copy;
269 to += copy;
270 bytes -= copy;
271
272 while (unlikely(!left && bytes)) {
273 iov++;
274 buf = iov->iov_base;
275 copy = min(bytes, iov->iov_len);
276 left = copyin(to, buf, copy);
277 copy -= left;
278 skip = copy;
279 to += copy;
280 bytes -= copy;
281 }
282 if (likely(!bytes)) {
283 kunmap_atomic(kaddr);
284 goto done;
285 }
286 offset = to - kaddr;
287 buf += copy;
288 kunmap_atomic(kaddr);
289 copy = min(bytes, iov->iov_len - skip);
290 }
291 /* Too bad - revert to non-atomic kmap */
292
293 kaddr = kmap(page);
294 to = kaddr + offset;
295 left = copyin(to, buf, copy);
296 copy -= left;
297 skip += copy;
298 to += copy;
299 bytes -= copy;
300 while (unlikely(!left && bytes)) {
301 iov++;
302 buf = iov->iov_base;
303 copy = min(bytes, iov->iov_len);
304 left = copyin(to, buf, copy);
305 copy -= left;
306 skip = copy;
307 to += copy;
308 bytes -= copy;
309 }
310 kunmap(page);
311
312done:
313 if (skip == iov->iov_len) {
314 iov++;
315 skip = 0;
316 }
317 i->count -= wanted - bytes;
318 i->nr_segs -= iov - i->iov;
319 i->iov = iov;
320 i->iov_offset = skip;
321 return wanted - bytes;
322}
323
324#ifdef PIPE_PARANOIA
325static bool sanity(const struct iov_iter *i)
326{
327 struct pipe_inode_info *pipe = i->pipe;
328 int idx = i->idx;
329 int next = pipe->curbuf + pipe->nrbufs;
330 if (i->iov_offset) {
331 struct pipe_buffer *p;
332 if (unlikely(!pipe->nrbufs))
333 goto Bad; // pipe must be non-empty
334 if (unlikely(idx != ((next - 1) & (pipe->buffers - 1))))
335 goto Bad; // must be at the last buffer...
336
337 p = &pipe->bufs[idx];
338 if (unlikely(p->offset + p->len != i->iov_offset))
339 goto Bad; // ... at the end of segment
340 } else {
341 if (idx != (next & (pipe->buffers - 1)))
342 goto Bad; // must be right after the last buffer
343 }
344 return true;
345Bad:
346 printk(KERN_ERR "idx = %d, offset = %zd\n", i->idx, i->iov_offset);
347 printk(KERN_ERR "curbuf = %d, nrbufs = %d, buffers = %d\n",
348 pipe->curbuf, pipe->nrbufs, pipe->buffers);
349 for (idx = 0; idx < pipe->buffers; idx++)
350 printk(KERN_ERR "[%p %p %d %d]\n",
351 pipe->bufs[idx].ops,
352 pipe->bufs[idx].page,
353 pipe->bufs[idx].offset,
354 pipe->bufs[idx].len);
355 WARN_ON(1);
356 return false;
357}
358#else
359#define sanity(i) true
360#endif
361
362static inline int next_idx(int idx, struct pipe_inode_info *pipe)
363{
364 return (idx + 1) & (pipe->buffers - 1);
365}
366
367static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t bytes,
368 struct iov_iter *i)
369{
370 struct pipe_inode_info *pipe = i->pipe;
371 struct pipe_buffer *buf;
372 size_t off;
373 int idx;
374
375 if (unlikely(bytes > i->count))
376 bytes = i->count;
377
378 if (unlikely(!bytes))
379 return 0;
380
381 if (!sanity(i))
382 return 0;
383
384 off = i->iov_offset;
385 idx = i->idx;
386 buf = &pipe->bufs[idx];
387 if (off) {
388 if (offset == off && buf->page == page) {
389 /* merge with the last one */
390 buf->len += bytes;
391 i->iov_offset += bytes;
392 goto out;
393 }
394 idx = next_idx(idx, pipe);
395 buf = &pipe->bufs[idx];
396 }
397 if (idx == pipe->curbuf && pipe->nrbufs)
398 return 0;
399 pipe->nrbufs++;
400 buf->ops = &page_cache_pipe_buf_ops;
401 get_page(buf->page = page);
402 buf->offset = offset;
403 buf->len = bytes;
404 i->iov_offset = offset + bytes;
405 i->idx = idx;
406out:
407 i->count -= bytes;
408 return bytes;
409}
410
411/*
412 * Fault in one or more iovecs of the given iov_iter, to a maximum length of
413 * bytes. For each iovec, fault in each page that constitutes the iovec.
414 *
415 * Return 0 on success, or non-zero if the memory could not be accessed (i.e.
416 * because it is an invalid address).
417 */
418int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes)
419{
420 size_t skip = i->iov_offset;
421 const struct iovec *iov;
422 int err;
423 struct iovec v;
424
Olivier Deprez0e641232021-09-23 10:07:05 +0200425 if (iter_is_iovec(i)) {
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000426 iterate_iovec(i, bytes, v, iov, skip, ({
427 err = fault_in_pages_readable(v.iov_base, v.iov_len);
428 if (unlikely(err))
429 return err;
430 0;}))
431 }
432 return 0;
433}
434EXPORT_SYMBOL(iov_iter_fault_in_readable);
435
David Brazdil0f672f62019-12-10 10:32:29 +0000436void iov_iter_init(struct iov_iter *i, unsigned int direction,
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000437 const struct iovec *iov, unsigned long nr_segs,
438 size_t count)
439{
David Brazdil0f672f62019-12-10 10:32:29 +0000440 WARN_ON(direction & ~(READ | WRITE));
441 direction &= READ | WRITE;
442
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000443 /* It will get better. Eventually... */
444 if (uaccess_kernel()) {
David Brazdil0f672f62019-12-10 10:32:29 +0000445 i->type = ITER_KVEC | direction;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000446 i->kvec = (struct kvec *)iov;
447 } else {
David Brazdil0f672f62019-12-10 10:32:29 +0000448 i->type = ITER_IOVEC | direction;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000449 i->iov = iov;
450 }
451 i->nr_segs = nr_segs;
452 i->iov_offset = 0;
453 i->count = count;
454}
455EXPORT_SYMBOL(iov_iter_init);
456
457static void memcpy_from_page(char *to, struct page *page, size_t offset, size_t len)
458{
459 char *from = kmap_atomic(page);
460 memcpy(to, from + offset, len);
461 kunmap_atomic(from);
462}
463
464static void memcpy_to_page(struct page *page, size_t offset, const char *from, size_t len)
465{
466 char *to = kmap_atomic(page);
467 memcpy(to + offset, from, len);
468 kunmap_atomic(to);
469}
470
471static void memzero_page(struct page *page, size_t offset, size_t len)
472{
473 char *addr = kmap_atomic(page);
474 memset(addr + offset, 0, len);
475 kunmap_atomic(addr);
476}
477
478static inline bool allocated(struct pipe_buffer *buf)
479{
480 return buf->ops == &default_pipe_buf_ops;
481}
482
483static inline void data_start(const struct iov_iter *i, int *idxp, size_t *offp)
484{
485 size_t off = i->iov_offset;
486 int idx = i->idx;
487 if (off && (!allocated(&i->pipe->bufs[idx]) || off == PAGE_SIZE)) {
488 idx = next_idx(idx, i->pipe);
489 off = 0;
490 }
491 *idxp = idx;
492 *offp = off;
493}
494
495static size_t push_pipe(struct iov_iter *i, size_t size,
496 int *idxp, size_t *offp)
497{
498 struct pipe_inode_info *pipe = i->pipe;
499 size_t off;
500 int idx;
501 ssize_t left;
502
503 if (unlikely(size > i->count))
504 size = i->count;
505 if (unlikely(!size))
506 return 0;
507
508 left = size;
509 data_start(i, &idx, &off);
510 *idxp = idx;
511 *offp = off;
512 if (off) {
513 left -= PAGE_SIZE - off;
514 if (left <= 0) {
515 pipe->bufs[idx].len += size;
516 return size;
517 }
518 pipe->bufs[idx].len = PAGE_SIZE;
519 idx = next_idx(idx, pipe);
520 }
521 while (idx != pipe->curbuf || !pipe->nrbufs) {
522 struct page *page = alloc_page(GFP_USER);
523 if (!page)
524 break;
525 pipe->nrbufs++;
526 pipe->bufs[idx].ops = &default_pipe_buf_ops;
527 pipe->bufs[idx].page = page;
528 pipe->bufs[idx].offset = 0;
529 if (left <= PAGE_SIZE) {
530 pipe->bufs[idx].len = left;
531 return size;
532 }
533 pipe->bufs[idx].len = PAGE_SIZE;
534 left -= PAGE_SIZE;
535 idx = next_idx(idx, pipe);
536 }
537 return size - left;
538}
539
540static size_t copy_pipe_to_iter(const void *addr, size_t bytes,
541 struct iov_iter *i)
542{
543 struct pipe_inode_info *pipe = i->pipe;
544 size_t n, off;
545 int idx;
546
547 if (!sanity(i))
548 return 0;
549
550 bytes = n = push_pipe(i, bytes, &idx, &off);
551 if (unlikely(!n))
552 return 0;
553 for ( ; n; idx = next_idx(idx, pipe), off = 0) {
554 size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
555 memcpy_to_page(pipe->bufs[idx].page, off, addr, chunk);
556 i->idx = idx;
557 i->iov_offset = off + chunk;
558 n -= chunk;
559 addr += chunk;
560 }
561 i->count -= bytes;
562 return bytes;
563}
564
David Brazdil0f672f62019-12-10 10:32:29 +0000565static __wsum csum_and_memcpy(void *to, const void *from, size_t len,
566 __wsum sum, size_t off)
567{
568 __wsum next = csum_partial_copy_nocheck(from, to, len, 0);
569 return csum_block_add(sum, next, off);
570}
571
572static size_t csum_and_copy_to_pipe_iter(const void *addr, size_t bytes,
Olivier Deprez0e641232021-09-23 10:07:05 +0200573 struct csum_state *csstate,
574 struct iov_iter *i)
David Brazdil0f672f62019-12-10 10:32:29 +0000575{
576 struct pipe_inode_info *pipe = i->pipe;
Olivier Deprez0e641232021-09-23 10:07:05 +0200577 __wsum sum = csstate->csum;
578 size_t off = csstate->off;
David Brazdil0f672f62019-12-10 10:32:29 +0000579 size_t n, r;
David Brazdil0f672f62019-12-10 10:32:29 +0000580 int idx;
581
582 if (!sanity(i))
583 return 0;
584
585 bytes = n = push_pipe(i, bytes, &idx, &r);
586 if (unlikely(!n))
587 return 0;
588 for ( ; n; idx = next_idx(idx, pipe), r = 0) {
589 size_t chunk = min_t(size_t, n, PAGE_SIZE - r);
590 char *p = kmap_atomic(pipe->bufs[idx].page);
591 sum = csum_and_memcpy(p + r, addr, chunk, sum, off);
592 kunmap_atomic(p);
593 i->idx = idx;
594 i->iov_offset = r + chunk;
595 n -= chunk;
596 off += chunk;
597 addr += chunk;
598 }
599 i->count -= bytes;
Olivier Deprez0e641232021-09-23 10:07:05 +0200600 csstate->csum = sum;
601 csstate->off = off;
David Brazdil0f672f62019-12-10 10:32:29 +0000602 return bytes;
603}
604
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000605size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
606{
607 const char *from = addr;
David Brazdil0f672f62019-12-10 10:32:29 +0000608 if (unlikely(iov_iter_is_pipe(i)))
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000609 return copy_pipe_to_iter(addr, bytes, i);
610 if (iter_is_iovec(i))
611 might_fault();
612 iterate_and_advance(i, bytes, v,
613 copyout(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len),
614 memcpy_to_page(v.bv_page, v.bv_offset,
615 (from += v.bv_len) - v.bv_len, v.bv_len),
616 memcpy(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len)
617 )
618
619 return bytes;
620}
621EXPORT_SYMBOL(_copy_to_iter);
622
623#ifdef CONFIG_ARCH_HAS_UACCESS_MCSAFE
624static int copyout_mcsafe(void __user *to, const void *from, size_t n)
625{
David Brazdil0f672f62019-12-10 10:32:29 +0000626 if (access_ok(to, n)) {
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000627 kasan_check_read(from, n);
628 n = copy_to_user_mcsafe((__force void *) to, from, n);
629 }
630 return n;
631}
632
633static unsigned long memcpy_mcsafe_to_page(struct page *page, size_t offset,
634 const char *from, size_t len)
635{
636 unsigned long ret;
637 char *to;
638
639 to = kmap_atomic(page);
640 ret = memcpy_mcsafe(to + offset, from, len);
641 kunmap_atomic(to);
642
643 return ret;
644}
645
646static size_t copy_pipe_to_iter_mcsafe(const void *addr, size_t bytes,
647 struct iov_iter *i)
648{
649 struct pipe_inode_info *pipe = i->pipe;
650 size_t n, off, xfer = 0;
651 int idx;
652
653 if (!sanity(i))
654 return 0;
655
656 bytes = n = push_pipe(i, bytes, &idx, &off);
657 if (unlikely(!n))
658 return 0;
659 for ( ; n; idx = next_idx(idx, pipe), off = 0) {
660 size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
661 unsigned long rem;
662
663 rem = memcpy_mcsafe_to_page(pipe->bufs[idx].page, off, addr,
664 chunk);
665 i->idx = idx;
666 i->iov_offset = off + chunk - rem;
667 xfer += chunk - rem;
668 if (rem)
669 break;
670 n -= chunk;
671 addr += chunk;
672 }
673 i->count -= xfer;
674 return xfer;
675}
676
677/**
678 * _copy_to_iter_mcsafe - copy to user with source-read error exception handling
679 * @addr: source kernel address
680 * @bytes: total transfer length
681 * @iter: destination iterator
682 *
683 * The pmem driver arranges for filesystem-dax to use this facility via
684 * dax_copy_to_iter() for protecting read/write to persistent memory.
685 * Unless / until an architecture can guarantee identical performance
686 * between _copy_to_iter_mcsafe() and _copy_to_iter() it would be a
687 * performance regression to switch more users to the mcsafe version.
688 *
689 * Otherwise, the main differences between this and typical _copy_to_iter().
690 *
691 * * Typical tail/residue handling after a fault retries the copy
692 * byte-by-byte until the fault happens again. Re-triggering machine
693 * checks is potentially fatal so the implementation uses source
694 * alignment and poison alignment assumptions to avoid re-triggering
695 * hardware exceptions.
696 *
697 * * ITER_KVEC, ITER_PIPE, and ITER_BVEC can return short copies.
698 * Compare to copy_to_iter() where only ITER_IOVEC attempts might return
699 * a short copy.
700 *
701 * See MCSAFE_TEST for self-test.
702 */
703size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i)
704{
705 const char *from = addr;
706 unsigned long rem, curr_addr, s_addr = (unsigned long) addr;
707
David Brazdil0f672f62019-12-10 10:32:29 +0000708 if (unlikely(iov_iter_is_pipe(i)))
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000709 return copy_pipe_to_iter_mcsafe(addr, bytes, i);
710 if (iter_is_iovec(i))
711 might_fault();
712 iterate_and_advance(i, bytes, v,
713 copyout_mcsafe(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len),
714 ({
715 rem = memcpy_mcsafe_to_page(v.bv_page, v.bv_offset,
716 (from += v.bv_len) - v.bv_len, v.bv_len);
717 if (rem) {
718 curr_addr = (unsigned long) from;
719 bytes = curr_addr - s_addr - rem;
720 return bytes;
721 }
722 }),
723 ({
724 rem = memcpy_mcsafe(v.iov_base, (from += v.iov_len) - v.iov_len,
725 v.iov_len);
726 if (rem) {
727 curr_addr = (unsigned long) from;
728 bytes = curr_addr - s_addr - rem;
729 return bytes;
730 }
731 })
732 )
733
734 return bytes;
735}
736EXPORT_SYMBOL_GPL(_copy_to_iter_mcsafe);
737#endif /* CONFIG_ARCH_HAS_UACCESS_MCSAFE */
738
739size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
740{
741 char *to = addr;
David Brazdil0f672f62019-12-10 10:32:29 +0000742 if (unlikely(iov_iter_is_pipe(i))) {
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000743 WARN_ON(1);
744 return 0;
745 }
746 if (iter_is_iovec(i))
747 might_fault();
748 iterate_and_advance(i, bytes, v,
749 copyin((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
750 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
751 v.bv_offset, v.bv_len),
752 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
753 )
754
755 return bytes;
756}
757EXPORT_SYMBOL(_copy_from_iter);
758
759bool _copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i)
760{
761 char *to = addr;
David Brazdil0f672f62019-12-10 10:32:29 +0000762 if (unlikely(iov_iter_is_pipe(i))) {
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000763 WARN_ON(1);
764 return false;
765 }
766 if (unlikely(i->count < bytes))
767 return false;
768
769 if (iter_is_iovec(i))
770 might_fault();
771 iterate_all_kinds(i, bytes, v, ({
772 if (copyin((to += v.iov_len) - v.iov_len,
773 v.iov_base, v.iov_len))
774 return false;
775 0;}),
776 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
777 v.bv_offset, v.bv_len),
778 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
779 )
780
781 iov_iter_advance(i, bytes);
782 return true;
783}
784EXPORT_SYMBOL(_copy_from_iter_full);
785
786size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
787{
788 char *to = addr;
David Brazdil0f672f62019-12-10 10:32:29 +0000789 if (unlikely(iov_iter_is_pipe(i))) {
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000790 WARN_ON(1);
791 return 0;
792 }
793 iterate_and_advance(i, bytes, v,
794 __copy_from_user_inatomic_nocache((to += v.iov_len) - v.iov_len,
795 v.iov_base, v.iov_len),
796 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
797 v.bv_offset, v.bv_len),
798 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
799 )
800
801 return bytes;
802}
803EXPORT_SYMBOL(_copy_from_iter_nocache);
804
805#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
806/**
807 * _copy_from_iter_flushcache - write destination through cpu cache
808 * @addr: destination kernel address
809 * @bytes: total transfer length
810 * @iter: source iterator
811 *
812 * The pmem driver arranges for filesystem-dax to use this facility via
813 * dax_copy_from_iter() for ensuring that writes to persistent memory
814 * are flushed through the CPU cache. It is differentiated from
815 * _copy_from_iter_nocache() in that guarantees all data is flushed for
816 * all iterator types. The _copy_from_iter_nocache() only attempts to
817 * bypass the cache for the ITER_IOVEC case, and on some archs may use
818 * instructions that strand dirty-data in the cache.
819 */
820size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i)
821{
822 char *to = addr;
David Brazdil0f672f62019-12-10 10:32:29 +0000823 if (unlikely(iov_iter_is_pipe(i))) {
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000824 WARN_ON(1);
825 return 0;
826 }
827 iterate_and_advance(i, bytes, v,
828 __copy_from_user_flushcache((to += v.iov_len) - v.iov_len,
829 v.iov_base, v.iov_len),
830 memcpy_page_flushcache((to += v.bv_len) - v.bv_len, v.bv_page,
831 v.bv_offset, v.bv_len),
832 memcpy_flushcache((to += v.iov_len) - v.iov_len, v.iov_base,
833 v.iov_len)
834 )
835
836 return bytes;
837}
838EXPORT_SYMBOL_GPL(_copy_from_iter_flushcache);
839#endif
840
841bool _copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i)
842{
843 char *to = addr;
David Brazdil0f672f62019-12-10 10:32:29 +0000844 if (unlikely(iov_iter_is_pipe(i))) {
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000845 WARN_ON(1);
846 return false;
847 }
848 if (unlikely(i->count < bytes))
849 return false;
850 iterate_all_kinds(i, bytes, v, ({
851 if (__copy_from_user_inatomic_nocache((to += v.iov_len) - v.iov_len,
852 v.iov_base, v.iov_len))
853 return false;
854 0;}),
855 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
856 v.bv_offset, v.bv_len),
857 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
858 )
859
860 iov_iter_advance(i, bytes);
861 return true;
862}
863EXPORT_SYMBOL(_copy_from_iter_full_nocache);
864
865static inline bool page_copy_sane(struct page *page, size_t offset, size_t n)
866{
David Brazdil0f672f62019-12-10 10:32:29 +0000867 struct page *head;
868 size_t v = n + offset;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000869
David Brazdil0f672f62019-12-10 10:32:29 +0000870 /*
871 * The general case needs to access the page order in order
872 * to compute the page size.
873 * However, we mostly deal with order-0 pages and thus can
874 * avoid a possible cache line miss for requests that fit all
875 * page orders.
876 */
877 if (n <= v && v <= PAGE_SIZE)
878 return true;
879
880 head = compound_head(page);
881 v += (page - head) << PAGE_SHIFT;
882
883 if (likely(n <= v && v <= (page_size(head))))
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000884 return true;
885 WARN_ON(1);
886 return false;
887}
888
889size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
890 struct iov_iter *i)
891{
892 if (unlikely(!page_copy_sane(page, offset, bytes)))
893 return 0;
894 if (i->type & (ITER_BVEC|ITER_KVEC)) {
895 void *kaddr = kmap_atomic(page);
896 size_t wanted = copy_to_iter(kaddr + offset, bytes, i);
897 kunmap_atomic(kaddr);
898 return wanted;
Olivier Deprez0e641232021-09-23 10:07:05 +0200899 } else if (unlikely(iov_iter_is_discard(i))) {
900 if (unlikely(i->count < bytes))
901 bytes = i->count;
902 i->count -= bytes;
David Brazdil0f672f62019-12-10 10:32:29 +0000903 return bytes;
Olivier Deprez0e641232021-09-23 10:07:05 +0200904 } else if (likely(!iov_iter_is_pipe(i)))
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000905 return copy_page_to_iter_iovec(page, offset, bytes, i);
906 else
907 return copy_page_to_iter_pipe(page, offset, bytes, i);
908}
909EXPORT_SYMBOL(copy_page_to_iter);
910
911size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
912 struct iov_iter *i)
913{
914 if (unlikely(!page_copy_sane(page, offset, bytes)))
915 return 0;
David Brazdil0f672f62019-12-10 10:32:29 +0000916 if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000917 WARN_ON(1);
918 return 0;
919 }
920 if (i->type & (ITER_BVEC|ITER_KVEC)) {
921 void *kaddr = kmap_atomic(page);
922 size_t wanted = _copy_from_iter(kaddr + offset, bytes, i);
923 kunmap_atomic(kaddr);
924 return wanted;
925 } else
926 return copy_page_from_iter_iovec(page, offset, bytes, i);
927}
928EXPORT_SYMBOL(copy_page_from_iter);
929
930static size_t pipe_zero(size_t bytes, struct iov_iter *i)
931{
932 struct pipe_inode_info *pipe = i->pipe;
933 size_t n, off;
934 int idx;
935
936 if (!sanity(i))
937 return 0;
938
939 bytes = n = push_pipe(i, bytes, &idx, &off);
940 if (unlikely(!n))
941 return 0;
942
943 for ( ; n; idx = next_idx(idx, pipe), off = 0) {
944 size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
945 memzero_page(pipe->bufs[idx].page, off, chunk);
946 i->idx = idx;
947 i->iov_offset = off + chunk;
948 n -= chunk;
949 }
950 i->count -= bytes;
951 return bytes;
952}
953
954size_t iov_iter_zero(size_t bytes, struct iov_iter *i)
955{
David Brazdil0f672f62019-12-10 10:32:29 +0000956 if (unlikely(iov_iter_is_pipe(i)))
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000957 return pipe_zero(bytes, i);
958 iterate_and_advance(i, bytes, v,
959 clear_user(v.iov_base, v.iov_len),
960 memzero_page(v.bv_page, v.bv_offset, v.bv_len),
961 memset(v.iov_base, 0, v.iov_len)
962 )
963
964 return bytes;
965}
966EXPORT_SYMBOL(iov_iter_zero);
967
968size_t iov_iter_copy_from_user_atomic(struct page *page,
969 struct iov_iter *i, unsigned long offset, size_t bytes)
970{
971 char *kaddr = kmap_atomic(page), *p = kaddr + offset;
972 if (unlikely(!page_copy_sane(page, offset, bytes))) {
973 kunmap_atomic(kaddr);
974 return 0;
975 }
David Brazdil0f672f62019-12-10 10:32:29 +0000976 if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000977 kunmap_atomic(kaddr);
978 WARN_ON(1);
979 return 0;
980 }
981 iterate_all_kinds(i, bytes, v,
982 copyin((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
983 memcpy_from_page((p += v.bv_len) - v.bv_len, v.bv_page,
984 v.bv_offset, v.bv_len),
985 memcpy((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
986 )
987 kunmap_atomic(kaddr);
988 return bytes;
989}
990EXPORT_SYMBOL(iov_iter_copy_from_user_atomic);
991
992static inline void pipe_truncate(struct iov_iter *i)
993{
994 struct pipe_inode_info *pipe = i->pipe;
995 if (pipe->nrbufs) {
996 size_t off = i->iov_offset;
997 int idx = i->idx;
998 int nrbufs = (idx - pipe->curbuf) & (pipe->buffers - 1);
999 if (off) {
1000 pipe->bufs[idx].len = off - pipe->bufs[idx].offset;
1001 idx = next_idx(idx, pipe);
1002 nrbufs++;
1003 }
1004 while (pipe->nrbufs > nrbufs) {
1005 pipe_buf_release(pipe, &pipe->bufs[idx]);
1006 idx = next_idx(idx, pipe);
1007 pipe->nrbufs--;
1008 }
1009 }
1010}
1011
1012static void pipe_advance(struct iov_iter *i, size_t size)
1013{
1014 struct pipe_inode_info *pipe = i->pipe;
1015 if (unlikely(i->count < size))
1016 size = i->count;
1017 if (size) {
1018 struct pipe_buffer *buf;
1019 size_t off = i->iov_offset, left = size;
1020 int idx = i->idx;
1021 if (off) /* make it relative to the beginning of buffer */
1022 left += off - pipe->bufs[idx].offset;
1023 while (1) {
1024 buf = &pipe->bufs[idx];
1025 if (left <= buf->len)
1026 break;
1027 left -= buf->len;
1028 idx = next_idx(idx, pipe);
1029 }
1030 i->idx = idx;
1031 i->iov_offset = buf->offset + left;
1032 }
1033 i->count -= size;
1034 /* ... and discard everything past that point */
1035 pipe_truncate(i);
1036}
1037
1038void iov_iter_advance(struct iov_iter *i, size_t size)
1039{
David Brazdil0f672f62019-12-10 10:32:29 +00001040 if (unlikely(iov_iter_is_pipe(i))) {
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001041 pipe_advance(i, size);
1042 return;
1043 }
David Brazdil0f672f62019-12-10 10:32:29 +00001044 if (unlikely(iov_iter_is_discard(i))) {
1045 i->count -= size;
1046 return;
1047 }
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001048 iterate_and_advance(i, size, v, 0, 0, 0)
1049}
1050EXPORT_SYMBOL(iov_iter_advance);
1051
1052void iov_iter_revert(struct iov_iter *i, size_t unroll)
1053{
1054 if (!unroll)
1055 return;
1056 if (WARN_ON(unroll > MAX_RW_COUNT))
1057 return;
1058 i->count += unroll;
David Brazdil0f672f62019-12-10 10:32:29 +00001059 if (unlikely(iov_iter_is_pipe(i))) {
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001060 struct pipe_inode_info *pipe = i->pipe;
1061 int idx = i->idx;
1062 size_t off = i->iov_offset;
1063 while (1) {
1064 size_t n = off - pipe->bufs[idx].offset;
1065 if (unroll < n) {
1066 off -= unroll;
1067 break;
1068 }
1069 unroll -= n;
1070 if (!unroll && idx == i->start_idx) {
1071 off = 0;
1072 break;
1073 }
1074 if (!idx--)
1075 idx = pipe->buffers - 1;
1076 off = pipe->bufs[idx].offset + pipe->bufs[idx].len;
1077 }
1078 i->iov_offset = off;
1079 i->idx = idx;
1080 pipe_truncate(i);
1081 return;
1082 }
David Brazdil0f672f62019-12-10 10:32:29 +00001083 if (unlikely(iov_iter_is_discard(i)))
1084 return;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001085 if (unroll <= i->iov_offset) {
1086 i->iov_offset -= unroll;
1087 return;
1088 }
1089 unroll -= i->iov_offset;
David Brazdil0f672f62019-12-10 10:32:29 +00001090 if (iov_iter_is_bvec(i)) {
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001091 const struct bio_vec *bvec = i->bvec;
1092 while (1) {
1093 size_t n = (--bvec)->bv_len;
1094 i->nr_segs++;
1095 if (unroll <= n) {
1096 i->bvec = bvec;
1097 i->iov_offset = n - unroll;
1098 return;
1099 }
1100 unroll -= n;
1101 }
1102 } else { /* same logics for iovec and kvec */
1103 const struct iovec *iov = i->iov;
1104 while (1) {
1105 size_t n = (--iov)->iov_len;
1106 i->nr_segs++;
1107 if (unroll <= n) {
1108 i->iov = iov;
1109 i->iov_offset = n - unroll;
1110 return;
1111 }
1112 unroll -= n;
1113 }
1114 }
1115}
1116EXPORT_SYMBOL(iov_iter_revert);
1117
1118/*
1119 * Return the count of just the current iov_iter segment.
1120 */
1121size_t iov_iter_single_seg_count(const struct iov_iter *i)
1122{
David Brazdil0f672f62019-12-10 10:32:29 +00001123 if (unlikely(iov_iter_is_pipe(i)))
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001124 return i->count; // it is a silly place, anyway
1125 if (i->nr_segs == 1)
1126 return i->count;
David Brazdil0f672f62019-12-10 10:32:29 +00001127 if (unlikely(iov_iter_is_discard(i)))
1128 return i->count;
1129 else if (iov_iter_is_bvec(i))
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001130 return min(i->count, i->bvec->bv_len - i->iov_offset);
1131 else
1132 return min(i->count, i->iov->iov_len - i->iov_offset);
1133}
1134EXPORT_SYMBOL(iov_iter_single_seg_count);
1135
David Brazdil0f672f62019-12-10 10:32:29 +00001136void iov_iter_kvec(struct iov_iter *i, unsigned int direction,
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001137 const struct kvec *kvec, unsigned long nr_segs,
1138 size_t count)
1139{
David Brazdil0f672f62019-12-10 10:32:29 +00001140 WARN_ON(direction & ~(READ | WRITE));
1141 i->type = ITER_KVEC | (direction & (READ | WRITE));
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001142 i->kvec = kvec;
1143 i->nr_segs = nr_segs;
1144 i->iov_offset = 0;
1145 i->count = count;
1146}
1147EXPORT_SYMBOL(iov_iter_kvec);
1148
David Brazdil0f672f62019-12-10 10:32:29 +00001149void iov_iter_bvec(struct iov_iter *i, unsigned int direction,
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001150 const struct bio_vec *bvec, unsigned long nr_segs,
1151 size_t count)
1152{
David Brazdil0f672f62019-12-10 10:32:29 +00001153 WARN_ON(direction & ~(READ | WRITE));
1154 i->type = ITER_BVEC | (direction & (READ | WRITE));
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001155 i->bvec = bvec;
1156 i->nr_segs = nr_segs;
1157 i->iov_offset = 0;
1158 i->count = count;
1159}
1160EXPORT_SYMBOL(iov_iter_bvec);
1161
David Brazdil0f672f62019-12-10 10:32:29 +00001162void iov_iter_pipe(struct iov_iter *i, unsigned int direction,
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001163 struct pipe_inode_info *pipe,
1164 size_t count)
1165{
David Brazdil0f672f62019-12-10 10:32:29 +00001166 BUG_ON(direction != READ);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001167 WARN_ON(pipe->nrbufs == pipe->buffers);
David Brazdil0f672f62019-12-10 10:32:29 +00001168 i->type = ITER_PIPE | READ;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001169 i->pipe = pipe;
1170 i->idx = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
1171 i->iov_offset = 0;
1172 i->count = count;
1173 i->start_idx = i->idx;
1174}
1175EXPORT_SYMBOL(iov_iter_pipe);
1176
David Brazdil0f672f62019-12-10 10:32:29 +00001177/**
1178 * iov_iter_discard - Initialise an I/O iterator that discards data
1179 * @i: The iterator to initialise.
1180 * @direction: The direction of the transfer.
1181 * @count: The size of the I/O buffer in bytes.
1182 *
1183 * Set up an I/O iterator that just discards everything that's written to it.
1184 * It's only available as a READ iterator.
1185 */
1186void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count)
1187{
1188 BUG_ON(direction != READ);
1189 i->type = ITER_DISCARD | READ;
1190 i->count = count;
1191 i->iov_offset = 0;
1192}
1193EXPORT_SYMBOL(iov_iter_discard);
1194
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001195unsigned long iov_iter_alignment(const struct iov_iter *i)
1196{
1197 unsigned long res = 0;
1198 size_t size = i->count;
1199
David Brazdil0f672f62019-12-10 10:32:29 +00001200 if (unlikely(iov_iter_is_pipe(i))) {
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001201 if (size && i->iov_offset && allocated(&i->pipe->bufs[i->idx]))
1202 return size | i->iov_offset;
1203 return size;
1204 }
1205 iterate_all_kinds(i, size, v,
1206 (res |= (unsigned long)v.iov_base | v.iov_len, 0),
1207 res |= v.bv_offset | v.bv_len,
1208 res |= (unsigned long)v.iov_base | v.iov_len
1209 )
1210 return res;
1211}
1212EXPORT_SYMBOL(iov_iter_alignment);
1213
1214unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
1215{
1216 unsigned long res = 0;
1217 size_t size = i->count;
1218
David Brazdil0f672f62019-12-10 10:32:29 +00001219 if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001220 WARN_ON(1);
1221 return ~0U;
1222 }
1223
1224 iterate_all_kinds(i, size, v,
1225 (res |= (!res ? 0 : (unsigned long)v.iov_base) |
1226 (size != v.iov_len ? size : 0), 0),
1227 (res |= (!res ? 0 : (unsigned long)v.bv_offset) |
1228 (size != v.bv_len ? size : 0)),
1229 (res |= (!res ? 0 : (unsigned long)v.iov_base) |
1230 (size != v.iov_len ? size : 0))
1231 );
1232 return res;
1233}
1234EXPORT_SYMBOL(iov_iter_gap_alignment);
1235
1236static inline ssize_t __pipe_get_pages(struct iov_iter *i,
1237 size_t maxsize,
1238 struct page **pages,
1239 int idx,
1240 size_t *start)
1241{
1242 struct pipe_inode_info *pipe = i->pipe;
1243 ssize_t n = push_pipe(i, maxsize, &idx, start);
1244 if (!n)
1245 return -EFAULT;
1246
1247 maxsize = n;
1248 n += *start;
1249 while (n > 0) {
1250 get_page(*pages++ = pipe->bufs[idx].page);
1251 idx = next_idx(idx, pipe);
1252 n -= PAGE_SIZE;
1253 }
1254
1255 return maxsize;
1256}
1257
1258static ssize_t pipe_get_pages(struct iov_iter *i,
1259 struct page **pages, size_t maxsize, unsigned maxpages,
1260 size_t *start)
1261{
1262 unsigned npages;
1263 size_t capacity;
1264 int idx;
1265
1266 if (!maxsize)
1267 return 0;
1268
1269 if (!sanity(i))
1270 return -EFAULT;
1271
1272 data_start(i, &idx, start);
1273 /* some of this one + all after this one */
1274 npages = ((i->pipe->curbuf - idx - 1) & (i->pipe->buffers - 1)) + 1;
1275 capacity = min(npages,maxpages) * PAGE_SIZE - *start;
1276
1277 return __pipe_get_pages(i, min(maxsize, capacity), pages, idx, start);
1278}
1279
1280ssize_t iov_iter_get_pages(struct iov_iter *i,
1281 struct page **pages, size_t maxsize, unsigned maxpages,
1282 size_t *start)
1283{
1284 if (maxsize > i->count)
1285 maxsize = i->count;
1286
David Brazdil0f672f62019-12-10 10:32:29 +00001287 if (unlikely(iov_iter_is_pipe(i)))
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001288 return pipe_get_pages(i, pages, maxsize, maxpages, start);
David Brazdil0f672f62019-12-10 10:32:29 +00001289 if (unlikely(iov_iter_is_discard(i)))
1290 return -EFAULT;
1291
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001292 iterate_all_kinds(i, maxsize, v, ({
1293 unsigned long addr = (unsigned long)v.iov_base;
1294 size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
1295 int n;
1296 int res;
1297
1298 if (len > maxpages * PAGE_SIZE)
1299 len = maxpages * PAGE_SIZE;
1300 addr &= ~(PAGE_SIZE - 1);
1301 n = DIV_ROUND_UP(len, PAGE_SIZE);
David Brazdil0f672f62019-12-10 10:32:29 +00001302 res = get_user_pages_fast(addr, n,
1303 iov_iter_rw(i) != WRITE ? FOLL_WRITE : 0,
1304 pages);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001305 if (unlikely(res < 0))
1306 return res;
1307 return (res == n ? len : res * PAGE_SIZE) - *start;
1308 0;}),({
1309 /* can't be more than PAGE_SIZE */
1310 *start = v.bv_offset;
1311 get_page(*pages = v.bv_page);
1312 return v.bv_len;
1313 }),({
1314 return -EFAULT;
1315 })
1316 )
1317 return 0;
1318}
1319EXPORT_SYMBOL(iov_iter_get_pages);
1320
1321static struct page **get_pages_array(size_t n)
1322{
1323 return kvmalloc_array(n, sizeof(struct page *), GFP_KERNEL);
1324}
1325
1326static ssize_t pipe_get_pages_alloc(struct iov_iter *i,
1327 struct page ***pages, size_t maxsize,
1328 size_t *start)
1329{
1330 struct page **p;
1331 ssize_t n;
1332 int idx;
1333 int npages;
1334
1335 if (!maxsize)
1336 return 0;
1337
1338 if (!sanity(i))
1339 return -EFAULT;
1340
1341 data_start(i, &idx, start);
1342 /* some of this one + all after this one */
1343 npages = ((i->pipe->curbuf - idx - 1) & (i->pipe->buffers - 1)) + 1;
1344 n = npages * PAGE_SIZE - *start;
1345 if (maxsize > n)
1346 maxsize = n;
1347 else
1348 npages = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE);
1349 p = get_pages_array(npages);
1350 if (!p)
1351 return -ENOMEM;
1352 n = __pipe_get_pages(i, maxsize, p, idx, start);
1353 if (n > 0)
1354 *pages = p;
1355 else
1356 kvfree(p);
1357 return n;
1358}
1359
1360ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
1361 struct page ***pages, size_t maxsize,
1362 size_t *start)
1363{
1364 struct page **p;
1365
1366 if (maxsize > i->count)
1367 maxsize = i->count;
1368
David Brazdil0f672f62019-12-10 10:32:29 +00001369 if (unlikely(iov_iter_is_pipe(i)))
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001370 return pipe_get_pages_alloc(i, pages, maxsize, start);
David Brazdil0f672f62019-12-10 10:32:29 +00001371 if (unlikely(iov_iter_is_discard(i)))
1372 return -EFAULT;
1373
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001374 iterate_all_kinds(i, maxsize, v, ({
1375 unsigned long addr = (unsigned long)v.iov_base;
1376 size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
1377 int n;
1378 int res;
1379
1380 addr &= ~(PAGE_SIZE - 1);
1381 n = DIV_ROUND_UP(len, PAGE_SIZE);
1382 p = get_pages_array(n);
1383 if (!p)
1384 return -ENOMEM;
David Brazdil0f672f62019-12-10 10:32:29 +00001385 res = get_user_pages_fast(addr, n,
1386 iov_iter_rw(i) != WRITE ? FOLL_WRITE : 0, p);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001387 if (unlikely(res < 0)) {
1388 kvfree(p);
1389 return res;
1390 }
1391 *pages = p;
1392 return (res == n ? len : res * PAGE_SIZE) - *start;
1393 0;}),({
1394 /* can't be more than PAGE_SIZE */
1395 *start = v.bv_offset;
1396 *pages = p = get_pages_array(1);
1397 if (!p)
1398 return -ENOMEM;
1399 get_page(*p = v.bv_page);
1400 return v.bv_len;
1401 }),({
1402 return -EFAULT;
1403 })
1404 )
1405 return 0;
1406}
1407EXPORT_SYMBOL(iov_iter_get_pages_alloc);
1408
1409size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum,
1410 struct iov_iter *i)
1411{
1412 char *to = addr;
1413 __wsum sum, next;
1414 size_t off = 0;
1415 sum = *csum;
David Brazdil0f672f62019-12-10 10:32:29 +00001416 if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001417 WARN_ON(1);
1418 return 0;
1419 }
1420 iterate_and_advance(i, bytes, v, ({
1421 int err = 0;
1422 next = csum_and_copy_from_user(v.iov_base,
1423 (to += v.iov_len) - v.iov_len,
1424 v.iov_len, 0, &err);
1425 if (!err) {
1426 sum = csum_block_add(sum, next, off);
1427 off += v.iov_len;
1428 }
1429 err ? v.iov_len : 0;
1430 }), ({
1431 char *p = kmap_atomic(v.bv_page);
David Brazdil0f672f62019-12-10 10:32:29 +00001432 sum = csum_and_memcpy((to += v.bv_len) - v.bv_len,
1433 p + v.bv_offset, v.bv_len,
1434 sum, off);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001435 kunmap_atomic(p);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001436 off += v.bv_len;
1437 }),({
David Brazdil0f672f62019-12-10 10:32:29 +00001438 sum = csum_and_memcpy((to += v.iov_len) - v.iov_len,
1439 v.iov_base, v.iov_len,
1440 sum, off);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001441 off += v.iov_len;
1442 })
1443 )
1444 *csum = sum;
1445 return bytes;
1446}
1447EXPORT_SYMBOL(csum_and_copy_from_iter);
1448
1449bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum,
1450 struct iov_iter *i)
1451{
1452 char *to = addr;
1453 __wsum sum, next;
1454 size_t off = 0;
1455 sum = *csum;
David Brazdil0f672f62019-12-10 10:32:29 +00001456 if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001457 WARN_ON(1);
1458 return false;
1459 }
1460 if (unlikely(i->count < bytes))
1461 return false;
1462 iterate_all_kinds(i, bytes, v, ({
1463 int err = 0;
1464 next = csum_and_copy_from_user(v.iov_base,
1465 (to += v.iov_len) - v.iov_len,
1466 v.iov_len, 0, &err);
1467 if (err)
1468 return false;
1469 sum = csum_block_add(sum, next, off);
1470 off += v.iov_len;
1471 0;
1472 }), ({
1473 char *p = kmap_atomic(v.bv_page);
David Brazdil0f672f62019-12-10 10:32:29 +00001474 sum = csum_and_memcpy((to += v.bv_len) - v.bv_len,
1475 p + v.bv_offset, v.bv_len,
1476 sum, off);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001477 kunmap_atomic(p);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001478 off += v.bv_len;
1479 }),({
David Brazdil0f672f62019-12-10 10:32:29 +00001480 sum = csum_and_memcpy((to += v.iov_len) - v.iov_len,
1481 v.iov_base, v.iov_len,
1482 sum, off);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001483 off += v.iov_len;
1484 })
1485 )
1486 *csum = sum;
1487 iov_iter_advance(i, bytes);
1488 return true;
1489}
1490EXPORT_SYMBOL(csum_and_copy_from_iter_full);
1491
Olivier Deprez0e641232021-09-23 10:07:05 +02001492size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *_csstate,
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001493 struct iov_iter *i)
1494{
Olivier Deprez0e641232021-09-23 10:07:05 +02001495 struct csum_state *csstate = _csstate;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001496 const char *from = addr;
1497 __wsum sum, next;
Olivier Deprez0e641232021-09-23 10:07:05 +02001498 size_t off;
David Brazdil0f672f62019-12-10 10:32:29 +00001499
1500 if (unlikely(iov_iter_is_pipe(i)))
Olivier Deprez0e641232021-09-23 10:07:05 +02001501 return csum_and_copy_to_pipe_iter(addr, bytes, _csstate, i);
David Brazdil0f672f62019-12-10 10:32:29 +00001502
Olivier Deprez0e641232021-09-23 10:07:05 +02001503 sum = csstate->csum;
1504 off = csstate->off;
David Brazdil0f672f62019-12-10 10:32:29 +00001505 if (unlikely(iov_iter_is_discard(i))) {
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001506 WARN_ON(1); /* for now */
1507 return 0;
1508 }
1509 iterate_and_advance(i, bytes, v, ({
1510 int err = 0;
1511 next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len,
1512 v.iov_base,
1513 v.iov_len, 0, &err);
1514 if (!err) {
1515 sum = csum_block_add(sum, next, off);
1516 off += v.iov_len;
1517 }
1518 err ? v.iov_len : 0;
1519 }), ({
1520 char *p = kmap_atomic(v.bv_page);
David Brazdil0f672f62019-12-10 10:32:29 +00001521 sum = csum_and_memcpy(p + v.bv_offset,
1522 (from += v.bv_len) - v.bv_len,
1523 v.bv_len, sum, off);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001524 kunmap_atomic(p);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001525 off += v.bv_len;
1526 }),({
David Brazdil0f672f62019-12-10 10:32:29 +00001527 sum = csum_and_memcpy(v.iov_base,
1528 (from += v.iov_len) - v.iov_len,
1529 v.iov_len, sum, off);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001530 off += v.iov_len;
1531 })
1532 )
Olivier Deprez0e641232021-09-23 10:07:05 +02001533 csstate->csum = sum;
1534 csstate->off = off;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001535 return bytes;
1536}
1537EXPORT_SYMBOL(csum_and_copy_to_iter);
1538
David Brazdil0f672f62019-12-10 10:32:29 +00001539size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp,
1540 struct iov_iter *i)
1541{
1542#ifdef CONFIG_CRYPTO
1543 struct ahash_request *hash = hashp;
1544 struct scatterlist sg;
1545 size_t copied;
1546
1547 copied = copy_to_iter(addr, bytes, i);
1548 sg_init_one(&sg, addr, copied);
1549 ahash_request_set_crypt(hash, &sg, NULL, copied);
1550 crypto_ahash_update(hash);
1551 return copied;
1552#else
1553 return 0;
1554#endif
1555}
1556EXPORT_SYMBOL(hash_and_copy_to_iter);
1557
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001558int iov_iter_npages(const struct iov_iter *i, int maxpages)
1559{
1560 size_t size = i->count;
1561 int npages = 0;
1562
1563 if (!size)
1564 return 0;
David Brazdil0f672f62019-12-10 10:32:29 +00001565 if (unlikely(iov_iter_is_discard(i)))
1566 return 0;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001567
David Brazdil0f672f62019-12-10 10:32:29 +00001568 if (unlikely(iov_iter_is_pipe(i))) {
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001569 struct pipe_inode_info *pipe = i->pipe;
1570 size_t off;
1571 int idx;
1572
1573 if (!sanity(i))
1574 return 0;
1575
1576 data_start(i, &idx, &off);
1577 /* some of this one + all after this one */
1578 npages = ((pipe->curbuf - idx - 1) & (pipe->buffers - 1)) + 1;
1579 if (npages >= maxpages)
1580 return maxpages;
1581 } else iterate_all_kinds(i, size, v, ({
1582 unsigned long p = (unsigned long)v.iov_base;
1583 npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE)
1584 - p / PAGE_SIZE;
1585 if (npages >= maxpages)
1586 return maxpages;
1587 0;}),({
1588 npages++;
1589 if (npages >= maxpages)
1590 return maxpages;
1591 }),({
1592 unsigned long p = (unsigned long)v.iov_base;
1593 npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE)
1594 - p / PAGE_SIZE;
1595 if (npages >= maxpages)
1596 return maxpages;
1597 })
1598 )
1599 return npages;
1600}
1601EXPORT_SYMBOL(iov_iter_npages);
1602
1603const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)
1604{
1605 *new = *old;
David Brazdil0f672f62019-12-10 10:32:29 +00001606 if (unlikely(iov_iter_is_pipe(new))) {
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001607 WARN_ON(1);
1608 return NULL;
1609 }
David Brazdil0f672f62019-12-10 10:32:29 +00001610 if (unlikely(iov_iter_is_discard(new)))
1611 return NULL;
1612 if (iov_iter_is_bvec(new))
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001613 return new->bvec = kmemdup(new->bvec,
1614 new->nr_segs * sizeof(struct bio_vec),
1615 flags);
1616 else
1617 /* iovec and kvec have identical layout */
1618 return new->iov = kmemdup(new->iov,
1619 new->nr_segs * sizeof(struct iovec),
1620 flags);
1621}
1622EXPORT_SYMBOL(dup_iter);
1623
1624/**
1625 * import_iovec() - Copy an array of &struct iovec from userspace
1626 * into the kernel, check that it is valid, and initialize a new
1627 * &struct iov_iter iterator to access it.
1628 *
1629 * @type: One of %READ or %WRITE.
1630 * @uvector: Pointer to the userspace array.
1631 * @nr_segs: Number of elements in userspace array.
1632 * @fast_segs: Number of elements in @iov.
1633 * @iov: (input and output parameter) Pointer to pointer to (usually small
1634 * on-stack) kernel array.
1635 * @i: Pointer to iterator that will be initialized on success.
1636 *
1637 * If the array pointed to by *@iov is large enough to hold all @nr_segs,
1638 * then this function places %NULL in *@iov on return. Otherwise, a new
1639 * array will be allocated and the result placed in *@iov. This means that
1640 * the caller may call kfree() on *@iov regardless of whether the small
1641 * on-stack array was used or not (and regardless of whether this function
1642 * returns an error or not).
1643 *
David Brazdil0f672f62019-12-10 10:32:29 +00001644 * Return: Negative error code on error, bytes imported on success
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001645 */
David Brazdil0f672f62019-12-10 10:32:29 +00001646ssize_t import_iovec(int type, const struct iovec __user * uvector,
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001647 unsigned nr_segs, unsigned fast_segs,
1648 struct iovec **iov, struct iov_iter *i)
1649{
1650 ssize_t n;
1651 struct iovec *p;
1652 n = rw_copy_check_uvector(type, uvector, nr_segs, fast_segs,
1653 *iov, &p);
1654 if (n < 0) {
1655 if (p != *iov)
1656 kfree(p);
1657 *iov = NULL;
1658 return n;
1659 }
1660 iov_iter_init(i, type, p, nr_segs, n);
1661 *iov = p == *iov ? NULL : p;
David Brazdil0f672f62019-12-10 10:32:29 +00001662 return n;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001663}
1664EXPORT_SYMBOL(import_iovec);
1665
1666#ifdef CONFIG_COMPAT
1667#include <linux/compat.h>
1668
David Brazdil0f672f62019-12-10 10:32:29 +00001669ssize_t compat_import_iovec(int type,
1670 const struct compat_iovec __user * uvector,
1671 unsigned nr_segs, unsigned fast_segs,
1672 struct iovec **iov, struct iov_iter *i)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001673{
1674 ssize_t n;
1675 struct iovec *p;
1676 n = compat_rw_copy_check_uvector(type, uvector, nr_segs, fast_segs,
1677 *iov, &p);
1678 if (n < 0) {
1679 if (p != *iov)
1680 kfree(p);
1681 *iov = NULL;
1682 return n;
1683 }
1684 iov_iter_init(i, type, p, nr_segs, n);
1685 *iov = p == *iov ? NULL : p;
David Brazdil0f672f62019-12-10 10:32:29 +00001686 return n;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001687}
1688#endif
1689
1690int import_single_range(int rw, void __user *buf, size_t len,
1691 struct iovec *iov, struct iov_iter *i)
1692{
1693 if (len > MAX_RW_COUNT)
1694 len = MAX_RW_COUNT;
David Brazdil0f672f62019-12-10 10:32:29 +00001695 if (unlikely(!access_ok(buf, len)))
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001696 return -EFAULT;
1697
1698 iov->iov_base = buf;
1699 iov->iov_len = len;
1700 iov_iter_init(i, rw, iov, 1, len);
1701 return 0;
1702}
1703EXPORT_SYMBOL(import_single_range);
1704
1705int iov_iter_for_each_range(struct iov_iter *i, size_t bytes,
1706 int (*f)(struct kvec *vec, void *context),
1707 void *context)
1708{
1709 struct kvec w;
1710 int err = -EINVAL;
1711 if (!bytes)
1712 return 0;
1713
1714 iterate_all_kinds(i, bytes, v, -EINVAL, ({
1715 w.iov_base = kmap(v.bv_page) + v.bv_offset;
1716 w.iov_len = v.bv_len;
1717 err = f(&w, context);
1718 kunmap(v.bv_page);
1719 err;}), ({
1720 w = v;
1721 err = f(&w, context);})
1722 )
1723 return err;
1724}
1725EXPORT_SYMBOL(iov_iter_for_each_range);