Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 1 | /* |
| 2 | * Copyright 2002, 2003 Andi Kleen, SuSE Labs. |
| 3 | * |
| 4 | * This file is subject to the terms and conditions of the GNU General Public |
| 5 | * License. See the file COPYING in the main directory of this archive |
| 6 | * for more details. No warranty for anything given at all. |
| 7 | */ |
| 8 | #include <linux/linkage.h> |
| 9 | #include <asm/errno.h> |
| 10 | #include <asm/asm.h> |
| 11 | |
| 12 | /* |
| 13 | * Checksum copy with exception handling. |
| 14 | * On exceptions src_err_ptr or dst_err_ptr is set to -EFAULT and the |
| 15 | * destination is zeroed. |
| 16 | * |
| 17 | * Input |
| 18 | * rdi source |
| 19 | * rsi destination |
| 20 | * edx len (32bit) |
| 21 | * ecx sum (32bit) |
| 22 | * r8 src_err_ptr (int) |
| 23 | * r9 dst_err_ptr (int) |
| 24 | * |
| 25 | * Output |
| 26 | * eax 64bit sum. undefined in case of exception. |
| 27 | * |
| 28 | * Wrappers need to take care of valid exception sum and zeroing. |
| 29 | * They also should align source or destination to 8 bytes. |
| 30 | */ |
| 31 | |
| 32 | .macro source |
| 33 | 10: |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 34 | _ASM_EXTABLE_UA(10b, .Lbad_source) |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 35 | .endm |
| 36 | |
| 37 | .macro dest |
| 38 | 20: |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 39 | _ASM_EXTABLE_UA(20b, .Lbad_dest) |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 40 | .endm |
| 41 | |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 42 | /* |
| 43 | * No _ASM_EXTABLE_UA; this is used for intentional prefetch on a |
| 44 | * potentially unmapped kernel address. |
| 45 | */ |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 46 | .macro ignore L=.Lignore |
| 47 | 30: |
| 48 | _ASM_EXTABLE(30b, \L) |
| 49 | .endm |
| 50 | |
| 51 | |
| 52 | ENTRY(csum_partial_copy_generic) |
| 53 | cmpl $3*64, %edx |
| 54 | jle .Lignore |
| 55 | |
| 56 | .Lignore: |
| 57 | subq $7*8, %rsp |
| 58 | movq %rbx, 2*8(%rsp) |
| 59 | movq %r12, 3*8(%rsp) |
| 60 | movq %r14, 4*8(%rsp) |
| 61 | movq %r13, 5*8(%rsp) |
| 62 | movq %r15, 6*8(%rsp) |
| 63 | |
| 64 | movq %r8, (%rsp) |
| 65 | movq %r9, 1*8(%rsp) |
| 66 | |
| 67 | movl %ecx, %eax |
| 68 | movl %edx, %ecx |
| 69 | |
| 70 | xorl %r9d, %r9d |
| 71 | movq %rcx, %r12 |
| 72 | |
| 73 | shrq $6, %r12 |
| 74 | jz .Lhandle_tail /* < 64 */ |
| 75 | |
| 76 | clc |
| 77 | |
| 78 | /* main loop. clear in 64 byte blocks */ |
| 79 | /* r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */ |
| 80 | /* r11: temp3, rdx: temp4, r12 loopcnt */ |
| 81 | /* r10: temp5, r15: temp6, r14 temp7, r13 temp8 */ |
| 82 | .p2align 4 |
| 83 | .Lloop: |
| 84 | source |
| 85 | movq (%rdi), %rbx |
| 86 | source |
| 87 | movq 8(%rdi), %r8 |
| 88 | source |
| 89 | movq 16(%rdi), %r11 |
| 90 | source |
| 91 | movq 24(%rdi), %rdx |
| 92 | |
| 93 | source |
| 94 | movq 32(%rdi), %r10 |
| 95 | source |
| 96 | movq 40(%rdi), %r15 |
| 97 | source |
| 98 | movq 48(%rdi), %r14 |
| 99 | source |
| 100 | movq 56(%rdi), %r13 |
| 101 | |
| 102 | ignore 2f |
| 103 | prefetcht0 5*64(%rdi) |
| 104 | 2: |
| 105 | adcq %rbx, %rax |
| 106 | adcq %r8, %rax |
| 107 | adcq %r11, %rax |
| 108 | adcq %rdx, %rax |
| 109 | adcq %r10, %rax |
| 110 | adcq %r15, %rax |
| 111 | adcq %r14, %rax |
| 112 | adcq %r13, %rax |
| 113 | |
| 114 | decl %r12d |
| 115 | |
| 116 | dest |
| 117 | movq %rbx, (%rsi) |
| 118 | dest |
| 119 | movq %r8, 8(%rsi) |
| 120 | dest |
| 121 | movq %r11, 16(%rsi) |
| 122 | dest |
| 123 | movq %rdx, 24(%rsi) |
| 124 | |
| 125 | dest |
| 126 | movq %r10, 32(%rsi) |
| 127 | dest |
| 128 | movq %r15, 40(%rsi) |
| 129 | dest |
| 130 | movq %r14, 48(%rsi) |
| 131 | dest |
| 132 | movq %r13, 56(%rsi) |
| 133 | |
| 134 | 3: |
| 135 | |
| 136 | leaq 64(%rdi), %rdi |
| 137 | leaq 64(%rsi), %rsi |
| 138 | |
| 139 | jnz .Lloop |
| 140 | |
| 141 | adcq %r9, %rax |
| 142 | |
| 143 | /* do last up to 56 bytes */ |
| 144 | .Lhandle_tail: |
| 145 | /* ecx: count */ |
| 146 | movl %ecx, %r10d |
| 147 | andl $63, %ecx |
| 148 | shrl $3, %ecx |
| 149 | jz .Lfold |
| 150 | clc |
| 151 | .p2align 4 |
| 152 | .Lloop_8: |
| 153 | source |
| 154 | movq (%rdi), %rbx |
| 155 | adcq %rbx, %rax |
| 156 | decl %ecx |
| 157 | dest |
| 158 | movq %rbx, (%rsi) |
| 159 | leaq 8(%rsi), %rsi /* preserve carry */ |
| 160 | leaq 8(%rdi), %rdi |
| 161 | jnz .Lloop_8 |
| 162 | adcq %r9, %rax /* add in carry */ |
| 163 | |
| 164 | .Lfold: |
| 165 | /* reduce checksum to 32bits */ |
| 166 | movl %eax, %ebx |
| 167 | shrq $32, %rax |
| 168 | addl %ebx, %eax |
| 169 | adcl %r9d, %eax |
| 170 | |
| 171 | /* do last up to 6 bytes */ |
| 172 | .Lhandle_7: |
| 173 | movl %r10d, %ecx |
| 174 | andl $7, %ecx |
| 175 | shrl $1, %ecx |
| 176 | jz .Lhandle_1 |
| 177 | movl $2, %edx |
| 178 | xorl %ebx, %ebx |
| 179 | clc |
| 180 | .p2align 4 |
| 181 | .Lloop_1: |
| 182 | source |
| 183 | movw (%rdi), %bx |
| 184 | adcl %ebx, %eax |
| 185 | decl %ecx |
| 186 | dest |
| 187 | movw %bx, (%rsi) |
| 188 | leaq 2(%rdi), %rdi |
| 189 | leaq 2(%rsi), %rsi |
| 190 | jnz .Lloop_1 |
| 191 | adcl %r9d, %eax /* add in carry */ |
| 192 | |
| 193 | /* handle last odd byte */ |
| 194 | .Lhandle_1: |
| 195 | testb $1, %r10b |
| 196 | jz .Lende |
| 197 | xorl %ebx, %ebx |
| 198 | source |
| 199 | movb (%rdi), %bl |
| 200 | dest |
| 201 | movb %bl, (%rsi) |
| 202 | addl %ebx, %eax |
| 203 | adcl %r9d, %eax /* carry */ |
| 204 | |
| 205 | .Lende: |
| 206 | movq 2*8(%rsp), %rbx |
| 207 | movq 3*8(%rsp), %r12 |
| 208 | movq 4*8(%rsp), %r14 |
| 209 | movq 5*8(%rsp), %r13 |
| 210 | movq 6*8(%rsp), %r15 |
| 211 | addq $7*8, %rsp |
| 212 | ret |
| 213 | |
| 214 | /* Exception handlers. Very simple, zeroing is done in the wrappers */ |
| 215 | .Lbad_source: |
| 216 | movq (%rsp), %rax |
| 217 | testq %rax, %rax |
| 218 | jz .Lende |
| 219 | movl $-EFAULT, (%rax) |
| 220 | jmp .Lende |
| 221 | |
| 222 | .Lbad_dest: |
| 223 | movq 8(%rsp), %rax |
| 224 | testq %rax, %rax |
| 225 | jz .Lende |
| 226 | movl $-EFAULT, (%rax) |
| 227 | jmp .Lende |
| 228 | ENDPROC(csum_partial_copy_generic) |