blob: bd8bb1eca3795cf90c982afe016d104c52d386ce [file] [log] [blame]
Paul Bakker5121ce52009-01-03 21:22:43 +00001/**
2 * \file bn_mul.h
Paul Bakkere0ccd0a2009-01-04 16:27:10 +00003 *
Darryl Greena40a1012018-01-05 15:33:17 +00004 * \brief Multi-precision integer library
5 */
6/*
Bence Szépkúti1e148272020-08-07 13:07:28 +02007 * Copyright The Mbed TLS Contributors
Manuel Pégourié-Gonnard37ff1402015-09-04 14:21:07 +02008 * SPDX-License-Identifier: Apache-2.0
9 *
10 * Licensed under the Apache License, Version 2.0 (the "License"); you may
11 * not use this file except in compliance with the License.
12 * You may obtain a copy of the License at
13 *
14 * http://www.apache.org/licenses/LICENSE-2.0
15 *
16 * Unless required by applicable law or agreed to in writing, software
17 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
18 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19 * See the License for the specific language governing permissions and
20 * limitations under the License.
Paul Bakker5121ce52009-01-03 21:22:43 +000021 */
22/*
23 * Multiply source vector [s] with b, add result
24 * to destination vector [d] and set carry c.
25 *
26 * Currently supports:
27 *
28 * . IA-32 (386+) . AMD64 / EM64T
29 * . IA-32 (SSE2) . Motorola 68000
30 * . PowerPC, 32-bit . MicroBlaze
31 * . PowerPC, 64-bit . TriCore
32 * . SPARC v8 . ARM v3+
33 * . Alpha . MIPS32
34 * . C, longlong . C, generic
35 */
Manuel Pégourié-Gonnard2cf5a7c2015-04-08 12:49:31 +020036#ifndef MBEDTLS_BN_MUL_H
37#define MBEDTLS_BN_MUL_H
Paul Bakker5121ce52009-01-03 21:22:43 +000038
Bence Szépkútic662b362021-05-27 11:25:03 +020039#include "mbedtls/build_info.h"
Ron Eldor8b0cf2e2018-02-14 16:02:41 +020040
Jaeden Ameroc49fbbf2019-07-04 20:01:14 +010041#include "mbedtls/bignum.h"
Paul Bakker5121ce52009-01-03 21:22:43 +000042
Janos Follath8c70e812021-06-24 14:48:38 +010043/*
44 * Conversion macros for embedded constants:
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +020045 * build lists of mbedtls_mpi_uint's from lists of unsigned char's grouped by 8,
46 * 4 or 2
Janos Follath8c70e812021-06-24 14:48:38 +010047 */
48#if defined(MBEDTLS_HAVE_INT32)
49
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +020050# define MBEDTLS_BYTES_TO_T_UINT_4(a, b, c, d) \
51 ((mbedtls_mpi_uint)(a) << 0) | ((mbedtls_mpi_uint)(b) << 8) | \
52 ((mbedtls_mpi_uint)(c) << 16) | ((mbedtls_mpi_uint)(d) << 24)
Janos Follath8c70e812021-06-24 14:48:38 +010053
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +020054# define MBEDTLS_BYTES_TO_T_UINT_2(a, b) \
55 MBEDTLS_BYTES_TO_T_UINT_4(a, b, 0, 0)
Janos Follath8c70e812021-06-24 14:48:38 +010056
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +020057# define MBEDTLS_BYTES_TO_T_UINT_8(a, b, c, d, e, f, g, h) \
58 MBEDTLS_BYTES_TO_T_UINT_4(a, b, c, d), \
59 MBEDTLS_BYTES_TO_T_UINT_4(e, f, g, h)
Janos Follath8c70e812021-06-24 14:48:38 +010060
61#else /* 64-bits */
62
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +020063# define MBEDTLS_BYTES_TO_T_UINT_8(a, b, c, d, e, f, g, h) \
64 ((mbedtls_mpi_uint)(a) << 0) | ((mbedtls_mpi_uint)(b) << 8) | \
65 ((mbedtls_mpi_uint)(c) << 16) | ((mbedtls_mpi_uint)(d) << 24) | \
66 ((mbedtls_mpi_uint)(e) << 32) | ((mbedtls_mpi_uint)(f) << 40) | \
67 ((mbedtls_mpi_uint)(g) << 48) | ((mbedtls_mpi_uint)(h) << 56)
Janos Follath8c70e812021-06-24 14:48:38 +010068
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +020069# define MBEDTLS_BYTES_TO_T_UINT_4(a, b, c, d) \
70 MBEDTLS_BYTES_TO_T_UINT_8(a, b, c, d, 0, 0, 0, 0)
Janos Follath8c70e812021-06-24 14:48:38 +010071
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +020072# define MBEDTLS_BYTES_TO_T_UINT_2(a, b) \
73 MBEDTLS_BYTES_TO_T_UINT_8(a, b, 0, 0, 0, 0, 0, 0)
Janos Follath8c70e812021-06-24 14:48:38 +010074
75#endif /* bits in mbedtls_mpi_uint */
76
Manuel Pégourié-Gonnard2cf5a7c2015-04-08 12:49:31 +020077#if defined(MBEDTLS_HAVE_ASM)
Paul Bakker5121ce52009-01-03 21:22:43 +000078
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +020079# ifndef asm
80# define asm __asm
81# endif
Manuel Pégourié-Gonnardba194322015-05-29 09:47:57 +020082
Manuel Pégourié-Gonnard854dab92015-08-10 12:08:34 +020083/* armcc5 --gnu defines __GNUC__ but doesn't support GNU's extended asm */
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +020084# if defined(__GNUC__) && \
85 (!defined(__ARMCC_VERSION) || __ARMCC_VERSION >= 6000000)
Simon Butcher4b9a3ad2018-07-10 20:18:29 +010086
87/*
88 * Disable use of the i386 assembly code below if option -O0, to disable all
89 * compiler optimisations, is passed, detected with __OPTIMIZE__
90 * This is done as the number of registers used in the assembly code doesn't
91 * work with the -O0 option.
92 */
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +020093# if defined(__i386__) && defined(__OPTIMIZE__)
Paul Bakker5121ce52009-01-03 21:22:43 +000094
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +020095# define MULADDC_INIT \
Manuel Pégourié-Gonnardacbcbba2014-06-19 17:20:43 +020096 asm( \
97 "movl %%ebx, %0 \n\t" \
98 "movl %5, %%esi \n\t" \
99 "movl %6, %%edi \n\t" \
100 "movl %7, %%ecx \n\t" \
101 "movl %8, %%ebx \n\t"
Paul Bakker5121ce52009-01-03 21:22:43 +0000102
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200103# define MULADDC_CORE \
104 "lodsl \n\t" \
105 "mull %%ebx \n\t" \
106 "addl %%ecx, %%eax \n\t" \
107 "adcl $0, %%edx \n\t" \
108 "addl (%%edi), %%eax \n\t" \
109 "adcl $0, %%edx \n\t" \
110 "movl %%edx, %%ecx \n\t" \
111 "stosl \n\t"
Paul Bakker5121ce52009-01-03 21:22:43 +0000112
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200113# if defined(MBEDTLS_HAVE_SSE2)
Paul Bakker5121ce52009-01-03 21:22:43 +0000114
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200115# define MULADDC_HUIT \
116 "movd %%ecx, %%mm1 \n\t" \
117 "movd %%ebx, %%mm0 \n\t" \
118 "movd (%%edi), %%mm3 \n\t" \
119 "paddq %%mm3, %%mm1 \n\t" \
120 "movd (%%esi), %%mm2 \n\t" \
121 "pmuludq %%mm0, %%mm2 \n\t" \
122 "movd 4(%%esi), %%mm4 \n\t" \
123 "pmuludq %%mm0, %%mm4 \n\t" \
124 "movd 8(%%esi), %%mm6 \n\t" \
125 "pmuludq %%mm0, %%mm6 \n\t" \
126 "movd 12(%%esi), %%mm7 \n\t" \
127 "pmuludq %%mm0, %%mm7 \n\t" \
128 "paddq %%mm2, %%mm1 \n\t" \
129 "movd 4(%%edi), %%mm3 \n\t" \
130 "paddq %%mm4, %%mm3 \n\t" \
131 "movd 8(%%edi), %%mm5 \n\t" \
132 "paddq %%mm6, %%mm5 \n\t" \
133 "movd 12(%%edi), %%mm4 \n\t" \
134 "paddq %%mm4, %%mm7 \n\t" \
135 "movd %%mm1, (%%edi) \n\t" \
136 "movd 16(%%esi), %%mm2 \n\t" \
137 "pmuludq %%mm0, %%mm2 \n\t" \
138 "psrlq $32, %%mm1 \n\t" \
139 "movd 20(%%esi), %%mm4 \n\t" \
140 "pmuludq %%mm0, %%mm4 \n\t" \
141 "paddq %%mm3, %%mm1 \n\t" \
142 "movd 24(%%esi), %%mm6 \n\t" \
143 "pmuludq %%mm0, %%mm6 \n\t" \
144 "movd %%mm1, 4(%%edi) \n\t" \
145 "psrlq $32, %%mm1 \n\t" \
146 "movd 28(%%esi), %%mm3 \n\t" \
147 "pmuludq %%mm0, %%mm3 \n\t" \
148 "paddq %%mm5, %%mm1 \n\t" \
149 "movd 16(%%edi), %%mm5 \n\t" \
150 "paddq %%mm5, %%mm2 \n\t" \
151 "movd %%mm1, 8(%%edi) \n\t" \
152 "psrlq $32, %%mm1 \n\t" \
153 "paddq %%mm7, %%mm1 \n\t" \
154 "movd 20(%%edi), %%mm5 \n\t" \
155 "paddq %%mm5, %%mm4 \n\t" \
156 "movd %%mm1, 12(%%edi) \n\t" \
157 "psrlq $32, %%mm1 \n\t" \
158 "paddq %%mm2, %%mm1 \n\t" \
159 "movd 24(%%edi), %%mm5 \n\t" \
160 "paddq %%mm5, %%mm6 \n\t" \
161 "movd %%mm1, 16(%%edi) \n\t" \
162 "psrlq $32, %%mm1 \n\t" \
163 "paddq %%mm4, %%mm1 \n\t" \
164 "movd 28(%%edi), %%mm5 \n\t" \
165 "paddq %%mm5, %%mm3 \n\t" \
166 "movd %%mm1, 20(%%edi) \n\t" \
167 "psrlq $32, %%mm1 \n\t" \
168 "paddq %%mm6, %%mm1 \n\t" \
169 "movd %%mm1, 24(%%edi) \n\t" \
170 "psrlq $32, %%mm1 \n\t" \
171 "paddq %%mm3, %%mm1 \n\t" \
172 "movd %%mm1, 28(%%edi) \n\t" \
173 "addl $32, %%edi \n\t" \
174 "addl $32, %%esi \n\t" \
175 "psrlq $32, %%mm1 \n\t" \
176 "movd %%mm1, %%ecx \n\t"
Paul Bakker5121ce52009-01-03 21:22:43 +0000177
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200178# define MULADDC_STOP \
Manuel Pégourié-Gonnardacbcbba2014-06-19 17:20:43 +0200179 "emms \n\t" \
180 "movl %4, %%ebx \n\t" \
181 "movl %%ecx, %1 \n\t" \
182 "movl %%edi, %2 \n\t" \
183 "movl %%esi, %3 \n\t" \
Paul Bakkerc89cf7c2009-07-19 21:37:39 +0000184 : "=m" (t), "=m" (c), "=m" (d), "=m" (s) \
185 : "m" (t), "m" (s), "m" (d), "m" (c), "m" (b) \
Simon Butcher53571642018-06-24 12:58:31 +0100186 : "eax", "ebx", "ecx", "edx", "esi", "edi" \
Paul Bakkerc89cf7c2009-07-19 21:37:39 +0000187 );
Paul Bakker5121ce52009-01-03 21:22:43 +0000188
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200189# else
Paul Bakker5121ce52009-01-03 21:22:43 +0000190
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200191# define MULADDC_STOP \
Manuel Pégourié-Gonnardacbcbba2014-06-19 17:20:43 +0200192 "movl %4, %%ebx \n\t" \
193 "movl %%ecx, %1 \n\t" \
194 "movl %%edi, %2 \n\t" \
195 "movl %%esi, %3 \n\t" \
Paul Bakkerc89cf7c2009-07-19 21:37:39 +0000196 : "=m" (t), "=m" (c), "=m" (d), "=m" (s) \
197 : "m" (t), "m" (s), "m" (d), "m" (c), "m" (b) \
Simon Butcher53571642018-06-24 12:58:31 +0100198 : "eax", "ebx", "ecx", "edx", "esi", "edi" \
Paul Bakkerc89cf7c2009-07-19 21:37:39 +0000199 );
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200200# endif /* SSE2 */
201# endif /* i386 */
Paul Bakker5121ce52009-01-03 21:22:43 +0000202
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200203# if defined(__amd64__) || defined(__x86_64__)
Paul Bakker5121ce52009-01-03 21:22:43 +0000204
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200205# define MULADDC_INIT \
Manuel Pégourié-Gonnardacbcbba2014-06-19 17:20:43 +0200206 asm( \
Simon Butchera86de142018-09-30 12:09:47 +0100207 "xorq %%r8, %%r8\n"
Paul Bakker5121ce52009-01-03 21:22:43 +0000208
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200209# define MULADDC_CORE \
210 "movq (%%rsi), %%rax\n" \
211 "mulq %%rbx\n" \
212 "addq $8, %%rsi\n" \
213 "addq %%rcx, %%rax\n" \
214 "movq %%r8, %%rcx\n" \
215 "adcq $0, %%rdx\n" \
216 "nop \n" \
217 "addq %%rax, (%%rdi)\n" \
218 "adcq %%rdx, %%rcx\n" \
219 "addq $8, %%rdi\n"
Paul Bakker5121ce52009-01-03 21:22:43 +0000220
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200221# define MULADDC_STOP \
James Cowgill21e402a2015-12-17 01:51:09 +0000222 : "+c" (c), "+D" (d), "+S" (s) \
223 : "b" (b) \
224 : "rax", "rdx", "r8" \
Manuel Pégourié-Gonnarddef018d2014-01-07 17:50:46 +0100225 );
Paul Bakker5121ce52009-01-03 21:22:43 +0000226
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200227# endif /* AMD64 */
Paul Bakker5121ce52009-01-03 21:22:43 +0000228
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200229# if defined(__aarch64__)
Ko-cc1871e2018-08-16 02:01:57 -0700230
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200231# define MULADDC_INIT \
Ko-cc1871e2018-08-16 02:01:57 -0700232 asm(
233
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200234# define MULADDC_CORE \
235 "ldr x4, [%2], #8 \n\t" \
236 "ldr x5, [%1] \n\t" \
237 "mul x6, x4, %3 \n\t" \
238 "umulh x7, x4, %3 \n\t" \
239 "adds x5, x5, x6 \n\t" \
240 "adc x7, x7, xzr \n\t" \
241 "adds x5, x5, %0 \n\t" \
242 "adc %0, x7, xzr \n\t" \
243 "str x5, [%1], #8 \n\t"
Ko-cc1871e2018-08-16 02:01:57 -0700244
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200245# define MULADDC_STOP \
Ko-cb260bb2018-08-20 13:59:53 +0100246 : "+r" (c), "+r" (d), "+r" (s) \
247 : "r" (b) \
248 : "x4", "x5", "x6", "x7", "cc" \
Ko-cc1871e2018-08-16 02:01:57 -0700249 );
250
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200251# endif /* Aarch64 */
Ko-cc1871e2018-08-16 02:01:57 -0700252
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200253# if defined(__mc68020__) || defined(__mcpu32__)
Paul Bakker5121ce52009-01-03 21:22:43 +0000254
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200255# define MULADDC_INIT \
Manuel Pégourié-Gonnardacbcbba2014-06-19 17:20:43 +0200256 asm( \
257 "movl %3, %%a2 \n\t" \
258 "movl %4, %%a3 \n\t" \
259 "movl %5, %%d3 \n\t" \
260 "movl %6, %%d2 \n\t" \
261 "moveq #0, %%d0 \n\t"
Paul Bakker5121ce52009-01-03 21:22:43 +0000262
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200263# define MULADDC_CORE \
264 "movel %%a2@+, %%d1 \n\t" \
265 "mulul %%d2, %%d4:%%d1 \n\t" \
266 "addl %%d3, %%d1 \n\t" \
267 "addxl %%d0, %%d4 \n\t" \
268 "moveq #0, %%d3 \n\t" \
269 "addl %%d1, %%a3@+ \n\t" \
270 "addxl %%d4, %%d3 \n\t"
Paul Bakker5121ce52009-01-03 21:22:43 +0000271
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200272# define MULADDC_STOP \
Manuel Pégourié-Gonnardacbcbba2014-06-19 17:20:43 +0200273 "movl %%d3, %0 \n\t" \
274 "movl %%a3, %1 \n\t" \
275 "movl %%a2, %2 \n\t" \
Manuel Pégourié-Gonnard3b05e4c2014-01-10 15:30:23 +0100276 : "=m" (c), "=m" (d), "=m" (s) \
277 : "m" (s), "m" (d), "m" (c), "m" (b) \
278 : "d0", "d1", "d2", "d3", "d4", "a2", "a3" \
279 );
Paul Bakker5121ce52009-01-03 21:22:43 +0000280
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200281# define MULADDC_HUIT \
282 "movel %%a2@+, %%d1 \n\t" \
283 "mulul %%d2, %%d4:%%d1 \n\t" \
284 "addxl %%d3, %%d1 \n\t" \
285 "addxl %%d0, %%d4 \n\t" \
286 "addl %%d1, %%a3@+ \n\t" \
287 "movel %%a2@+, %%d1 \n\t" \
288 "mulul %%d2, %%d3:%%d1 \n\t" \
289 "addxl %%d4, %%d1 \n\t" \
290 "addxl %%d0, %%d3 \n\t" \
291 "addl %%d1, %%a3@+ \n\t" \
292 "movel %%a2@+, %%d1 \n\t" \
293 "mulul %%d2, %%d4:%%d1 \n\t" \
294 "addxl %%d3, %%d1 \n\t" \
295 "addxl %%d0, %%d4 \n\t" \
296 "addl %%d1, %%a3@+ \n\t" \
297 "movel %%a2@+, %%d1 \n\t" \
298 "mulul %%d2, %%d3:%%d1 \n\t" \
299 "addxl %%d4, %%d1 \n\t" \
300 "addxl %%d0, %%d3 \n\t" \
301 "addl %%d1, %%a3@+ \n\t" \
302 "movel %%a2@+, %%d1 \n\t" \
303 "mulul %%d2, %%d4:%%d1 \n\t" \
304 "addxl %%d3, %%d1 \n\t" \
305 "addxl %%d0, %%d4 \n\t" \
306 "addl %%d1, %%a3@+ \n\t" \
307 "movel %%a2@+, %%d1 \n\t" \
308 "mulul %%d2, %%d3:%%d1 \n\t" \
309 "addxl %%d4, %%d1 \n\t" \
310 "addxl %%d0, %%d3 \n\t" \
311 "addl %%d1, %%a3@+ \n\t" \
312 "movel %%a2@+, %%d1 \n\t" \
313 "mulul %%d2, %%d4:%%d1 \n\t" \
314 "addxl %%d3, %%d1 \n\t" \
315 "addxl %%d0, %%d4 \n\t" \
316 "addl %%d1, %%a3@+ \n\t" \
317 "movel %%a2@+, %%d1 \n\t" \
318 "mulul %%d2, %%d3:%%d1 \n\t" \
319 "addxl %%d4, %%d1 \n\t" \
320 "addxl %%d0, %%d3 \n\t" \
321 "addl %%d1, %%a3@+ \n\t" \
322 "addxl %%d0, %%d3 \n\t"
Paul Bakker5121ce52009-01-03 21:22:43 +0000323
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200324# endif /* MC68000 */
Paul Bakker5121ce52009-01-03 21:22:43 +0000325
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200326# if defined(__powerpc64__) || defined(__ppc64__)
Paul Bakker5121ce52009-01-03 21:22:43 +0000327
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200328# if defined(__MACH__) && defined(__APPLE__)
Paul Bakker5121ce52009-01-03 21:22:43 +0000329
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200330# define MULADDC_INIT \
Manuel Pégourié-Gonnardacbcbba2014-06-19 17:20:43 +0200331 asm( \
332 "ld r3, %3 \n\t" \
333 "ld r4, %4 \n\t" \
334 "ld r5, %5 \n\t" \
335 "ld r6, %6 \n\t" \
336 "addi r3, r3, -8 \n\t" \
337 "addi r4, r4, -8 \n\t" \
338 "addic r5, r5, 0 \n\t"
Paul Bakker5121ce52009-01-03 21:22:43 +0000339
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200340# define MULADDC_CORE \
341 "ldu r7, 8(r3) \n\t" \
342 "mulld r8, r7, r6 \n\t" \
343 "mulhdu r9, r7, r6 \n\t" \
344 "adde r8, r8, r5 \n\t" \
345 "ld r7, 8(r4) \n\t" \
346 "addze r5, r9 \n\t" \
347 "addc r8, r8, r7 \n\t" \
348 "stdu r8, 8(r4) \n\t"
Paul Bakker5121ce52009-01-03 21:22:43 +0000349
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200350# define MULADDC_STOP \
Manuel Pégourié-Gonnardacbcbba2014-06-19 17:20:43 +0200351 "addze r5, r5 \n\t" \
352 "addi r4, r4, 8 \n\t" \
353 "addi r3, r3, 8 \n\t" \
354 "std r5, %0 \n\t" \
355 "std r4, %1 \n\t" \
356 "std r3, %2 \n\t" \
Manuel Pégourié-Gonnard02d800c2014-01-07 19:16:48 +0100357 : "=m" (c), "=m" (d), "=m" (s) \
358 : "m" (s), "m" (d), "m" (c), "m" (b) \
359 : "r3", "r4", "r5", "r6", "r7", "r8", "r9" \
360 );
361
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200362# else /* __MACH__ && __APPLE__ */
Paul Bakker5121ce52009-01-03 21:22:43 +0000363
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200364# define MULADDC_INIT \
Manuel Pégourié-Gonnardacbcbba2014-06-19 17:20:43 +0200365 asm( \
366 "ld %%r3, %3 \n\t" \
367 "ld %%r4, %4 \n\t" \
368 "ld %%r5, %5 \n\t" \
369 "ld %%r6, %6 \n\t" \
370 "addi %%r3, %%r3, -8 \n\t" \
371 "addi %%r4, %%r4, -8 \n\t" \
372 "addic %%r5, %%r5, 0 \n\t"
Paul Bakker5121ce52009-01-03 21:22:43 +0000373
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200374# define MULADDC_CORE \
375 "ldu %%r7, 8(%%r3) \n\t" \
376 "mulld %%r8, %%r7, %%r6 \n\t" \
377 "mulhdu %%r9, %%r7, %%r6 \n\t" \
378 "adde %%r8, %%r8, %%r5 \n\t" \
379 "ld %%r7, 8(%%r4) \n\t" \
380 "addze %%r5, %%r9 \n\t" \
381 "addc %%r8, %%r8, %%r7 \n\t" \
382 "stdu %%r8, 8(%%r4) \n\t"
Paul Bakker5121ce52009-01-03 21:22:43 +0000383
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200384# define MULADDC_STOP \
Manuel Pégourié-Gonnardacbcbba2014-06-19 17:20:43 +0200385 "addze %%r5, %%r5 \n\t" \
386 "addi %%r4, %%r4, 8 \n\t" \
387 "addi %%r3, %%r3, 8 \n\t" \
388 "std %%r5, %0 \n\t" \
389 "std %%r4, %1 \n\t" \
390 "std %%r3, %2 \n\t" \
Manuel Pégourié-Gonnard02d800c2014-01-07 19:16:48 +0100391 : "=m" (c), "=m" (d), "=m" (s) \
392 : "m" (s), "m" (d), "m" (c), "m" (b) \
393 : "r3", "r4", "r5", "r6", "r7", "r8", "r9" \
394 );
Paul Bakker5121ce52009-01-03 21:22:43 +0000395
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200396# endif /* __MACH__ && __APPLE__ */
Paul Bakker5121ce52009-01-03 21:22:43 +0000397
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200398# elif defined(__powerpc__) || defined(__ppc__) /* end PPC64/begin \
399 PPC32 */
Paul Bakker5121ce52009-01-03 21:22:43 +0000400
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200401# if defined(__MACH__) && defined(__APPLE__)
Paul Bakker5121ce52009-01-03 21:22:43 +0000402
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200403# define MULADDC_INIT \
Manuel Pégourié-Gonnardacbcbba2014-06-19 17:20:43 +0200404 asm( \
405 "lwz r3, %3 \n\t" \
406 "lwz r4, %4 \n\t" \
407 "lwz r5, %5 \n\t" \
408 "lwz r6, %6 \n\t" \
409 "addi r3, r3, -4 \n\t" \
410 "addi r4, r4, -4 \n\t" \
411 "addic r5, r5, 0 \n\t"
Paul Bakker5121ce52009-01-03 21:22:43 +0000412
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200413# define MULADDC_CORE \
414 "lwzu r7, 4(r3) \n\t" \
415 "mullw r8, r7, r6 \n\t" \
416 "mulhwu r9, r7, r6 \n\t" \
417 "adde r8, r8, r5 \n\t" \
418 "lwz r7, 4(r4) \n\t" \
419 "addze r5, r9 \n\t" \
420 "addc r8, r8, r7 \n\t" \
421 "stwu r8, 4(r4) \n\t"
Paul Bakker5121ce52009-01-03 21:22:43 +0000422
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200423# define MULADDC_STOP \
Manuel Pégourié-Gonnardacbcbba2014-06-19 17:20:43 +0200424 "addze r5, r5 \n\t" \
425 "addi r4, r4, 4 \n\t" \
426 "addi r3, r3, 4 \n\t" \
427 "stw r5, %0 \n\t" \
428 "stw r4, %1 \n\t" \
429 "stw r3, %2 \n\t" \
Manuel Pégourié-Gonnard02d800c2014-01-07 19:16:48 +0100430 : "=m" (c), "=m" (d), "=m" (s) \
431 : "m" (s), "m" (d), "m" (c), "m" (b) \
432 : "r3", "r4", "r5", "r6", "r7", "r8", "r9" \
433 );
Paul Bakker5121ce52009-01-03 21:22:43 +0000434
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200435# else /* __MACH__ && __APPLE__ */
Paul Bakker5121ce52009-01-03 21:22:43 +0000436
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200437# define MULADDC_INIT \
Manuel Pégourié-Gonnardacbcbba2014-06-19 17:20:43 +0200438 asm( \
439 "lwz %%r3, %3 \n\t" \
440 "lwz %%r4, %4 \n\t" \
441 "lwz %%r5, %5 \n\t" \
442 "lwz %%r6, %6 \n\t" \
443 "addi %%r3, %%r3, -4 \n\t" \
444 "addi %%r4, %%r4, -4 \n\t" \
445 "addic %%r5, %%r5, 0 \n\t"
Paul Bakker5121ce52009-01-03 21:22:43 +0000446
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200447# define MULADDC_CORE \
448 "lwzu %%r7, 4(%%r3) \n\t" \
449 "mullw %%r8, %%r7, %%r6 \n\t" \
450 "mulhwu %%r9, %%r7, %%r6 \n\t" \
451 "adde %%r8, %%r8, %%r5 \n\t" \
452 "lwz %%r7, 4(%%r4) \n\t" \
453 "addze %%r5, %%r9 \n\t" \
454 "addc %%r8, %%r8, %%r7 \n\t" \
455 "stwu %%r8, 4(%%r4) \n\t"
Paul Bakker5121ce52009-01-03 21:22:43 +0000456
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200457# define MULADDC_STOP \
Manuel Pégourié-Gonnardacbcbba2014-06-19 17:20:43 +0200458 "addze %%r5, %%r5 \n\t" \
459 "addi %%r4, %%r4, 4 \n\t" \
460 "addi %%r3, %%r3, 4 \n\t" \
461 "stw %%r5, %0 \n\t" \
462 "stw %%r4, %1 \n\t" \
463 "stw %%r3, %2 \n\t" \
Manuel Pégourié-Gonnard02d800c2014-01-07 19:16:48 +0100464 : "=m" (c), "=m" (d), "=m" (s) \
465 : "m" (s), "m" (d), "m" (c), "m" (b) \
466 : "r3", "r4", "r5", "r6", "r7", "r8", "r9" \
467 );
Paul Bakker5121ce52009-01-03 21:22:43 +0000468
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200469# endif /* __MACH__ && __APPLE__ */
Paul Bakker5121ce52009-01-03 21:22:43 +0000470
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200471# endif /* PPC32 */
Paul Bakker5121ce52009-01-03 21:22:43 +0000472
Manuel Pégourié-Gonnard31357252014-06-24 17:57:57 +0200473/*
Manuel Pégourié-Gonnard7c5fcdc2015-10-21 14:52:24 +0200474 * The Sparc(64) assembly is reported to be broken.
Manuel Pégourié-Gonnard31357252014-06-24 17:57:57 +0200475 * Disable it for now, until we're able to fix it.
476 */
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200477# if 0 && defined(__sparc__)
478# if defined(__sparc64__)
Paul Bakker5121ce52009-01-03 21:22:43 +0000479
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200480# define MULADDC_INIT \
Manuel Pégourié-Gonnardacbcbba2014-06-19 17:20:43 +0200481 asm( \
482 "ldx %3, %%o0 \n\t" \
483 "ldx %4, %%o1 \n\t" \
484 "ld %5, %%o2 \n\t" \
485 "ld %6, %%o3 \n\t"
Paul Bakker5121ce52009-01-03 21:22:43 +0000486
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200487# define MULADDC_CORE \
488 "ld [%%o0], %%o4 \n\t" \
489 "inc 4, %%o0 \n\t" \
490 "ld [%%o1], %%o5 \n\t" \
491 "umul %%o3, %%o4, %%o4 \n\t" \
492 "addcc %%o4, %%o2, %%o4 \n\t" \
493 "rd %%y, %%g1 \n\t" \
494 "addx %%g1, 0, %%g1 \n\t" \
495 "addcc %%o4, %%o5, %%o4 \n\t" \
496 "st %%o4, [%%o1] \n\t" \
497 "addx %%g1, 0, %%o2 \n\t" \
498 "inc 4, %%o1 \n\t"
Paul Bakker5121ce52009-01-03 21:22:43 +0000499
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200500# define MULADDC_STOP \
Manuel Pégourié-Gonnardacbcbba2014-06-19 17:20:43 +0200501 "st %%o2, %0 \n\t" \
502 "stx %%o1, %1 \n\t" \
503 "stx %%o0, %2 \n\t" \
Paul Bakker4f024b72012-10-30 07:29:57 +0000504 : "=m" (c), "=m" (d), "=m" (s) \
505 : "m" (s), "m" (d), "m" (c), "m" (b) \
506 : "g1", "o0", "o1", "o2", "o3", "o4", \
507 "o5" \
508 );
Paul Bakker4f024b72012-10-30 07:29:57 +0000509
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200510# else /* __sparc64__ */
Paul Bakker4f024b72012-10-30 07:29:57 +0000511
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200512# define MULADDC_INIT \
Manuel Pégourié-Gonnardacbcbba2014-06-19 17:20:43 +0200513 asm( \
514 "ld %3, %%o0 \n\t" \
515 "ld %4, %%o1 \n\t" \
516 "ld %5, %%o2 \n\t" \
517 "ld %6, %%o3 \n\t"
Paul Bakker4f024b72012-10-30 07:29:57 +0000518
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200519# define MULADDC_CORE \
520 "ld [%%o0], %%o4 \n\t" \
521 "inc 4, %%o0 \n\t" \
522 "ld [%%o1], %%o5 \n\t" \
523 "umul %%o3, %%o4, %%o4 \n\t" \
524 "addcc %%o4, %%o2, %%o4 \n\t" \
525 "rd %%y, %%g1 \n\t" \
526 "addx %%g1, 0, %%g1 \n\t" \
527 "addcc %%o4, %%o5, %%o4 \n\t" \
528 "st %%o4, [%%o1] \n\t" \
529 "addx %%g1, 0, %%o2 \n\t" \
530 "inc 4, %%o1 \n\t"
Paul Bakker4f024b72012-10-30 07:29:57 +0000531
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200532# define MULADDC_STOP \
Manuel Pégourié-Gonnardacbcbba2014-06-19 17:20:43 +0200533 "st %%o2, %0 \n\t" \
534 "st %%o1, %1 \n\t" \
535 "st %%o0, %2 \n\t" \
Paul Bakker4f024b72012-10-30 07:29:57 +0000536 : "=m" (c), "=m" (d), "=m" (s) \
537 : "m" (s), "m" (d), "m" (c), "m" (b) \
538 : "g1", "o0", "o1", "o2", "o3", "o4", \
539 "o5" \
540 );
Paul Bakker5121ce52009-01-03 21:22:43 +0000541
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200542# endif /* __sparc64__ */
543# endif /* __sparc__ */
Paul Bakker5121ce52009-01-03 21:22:43 +0000544
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200545# if defined(__microblaze__) || defined(microblaze)
Paul Bakker5121ce52009-01-03 21:22:43 +0000546
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200547# define MULADDC_INIT \
Manuel Pégourié-Gonnardacbcbba2014-06-19 17:20:43 +0200548 asm( \
549 "lwi r3, %3 \n\t" \
550 "lwi r4, %4 \n\t" \
551 "lwi r5, %5 \n\t" \
552 "lwi r6, %6 \n\t" \
553 "andi r7, r6, 0xffff \n\t" \
554 "bsrli r6, r6, 16 \n\t"
Paul Bakker5121ce52009-01-03 21:22:43 +0000555
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200556# define MULADDC_CORE \
557 "lhui r8, r3, 0 \n\t" \
558 "addi r3, r3, 2 \n\t" \
559 "lhui r9, r3, 0 \n\t" \
560 "addi r3, r3, 2 \n\t" \
561 "mul r10, r9, r6 \n\t" \
562 "mul r11, r8, r7 \n\t" \
563 "mul r12, r9, r7 \n\t" \
564 "mul r13, r8, r6 \n\t" \
565 "bsrli r8, r10, 16 \n\t" \
566 "bsrli r9, r11, 16 \n\t" \
567 "add r13, r13, r8 \n\t" \
568 "add r13, r13, r9 \n\t" \
569 "bslli r10, r10, 16 \n\t" \
570 "bslli r11, r11, 16 \n\t" \
571 "add r12, r12, r10 \n\t" \
572 "addc r13, r13, r0 \n\t" \
573 "add r12, r12, r11 \n\t" \
574 "addc r13, r13, r0 \n\t" \
575 "lwi r10, r4, 0 \n\t" \
576 "add r12, r12, r10 \n\t" \
577 "addc r13, r13, r0 \n\t" \
578 "add r12, r12, r5 \n\t" \
579 "addc r5, r13, r0 \n\t" \
580 "swi r12, r4, 0 \n\t" \
581 "addi r4, r4, 4 \n\t"
Paul Bakker5121ce52009-01-03 21:22:43 +0000582
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200583# define MULADDC_STOP \
Manuel Pégourié-Gonnardacbcbba2014-06-19 17:20:43 +0200584 "swi r5, %0 \n\t" \
585 "swi r4, %1 \n\t" \
586 "swi r3, %2 \n\t" \
Manuel Pégourié-Gonnard1753e2f2014-01-10 15:35:41 +0100587 : "=m" (c), "=m" (d), "=m" (s) \
588 : "m" (s), "m" (d), "m" (c), "m" (b) \
Zach van Rijne7d3f8e2018-05-21 10:52:34 -0400589 : "r3", "r4", "r5", "r6", "r7", "r8", \
Manuel Pégourié-Gonnard1753e2f2014-01-10 15:35:41 +0100590 "r9", "r10", "r11", "r12", "r13" \
591 );
Paul Bakker5121ce52009-01-03 21:22:43 +0000592
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200593# endif /* MicroBlaze */
Paul Bakker5121ce52009-01-03 21:22:43 +0000594
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200595# if defined(__tricore__)
Paul Bakker5121ce52009-01-03 21:22:43 +0000596
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200597# define MULADDC_INIT \
Manuel Pégourié-Gonnardacbcbba2014-06-19 17:20:43 +0200598 asm( \
599 "ld.a %%a2, %3 \n\t" \
600 "ld.a %%a3, %4 \n\t" \
601 "ld.w %%d4, %5 \n\t" \
602 "ld.w %%d1, %6 \n\t" \
603 "xor %%d5, %%d5 \n\t"
Paul Bakker5121ce52009-01-03 21:22:43 +0000604
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200605# define MULADDC_CORE \
606 "ld.w %%d0, [%%a2+] \n\t" \
607 "madd.u %%e2, %%e4, %%d0, %%d1 \n\t" \
608 "ld.w %%d0, [%%a3] \n\t" \
609 "addx %%d2, %%d2, %%d0 \n\t" \
610 "addc %%d3, %%d3, 0 \n\t" \
611 "mov %%d4, %%d3 \n\t" \
612 "st.w [%%a3+], %%d2 \n\t"
Paul Bakker5121ce52009-01-03 21:22:43 +0000613
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200614# define MULADDC_STOP \
Manuel Pégourié-Gonnardacbcbba2014-06-19 17:20:43 +0200615 "st.w %0, %%d4 \n\t" \
616 "st.a %1, %%a3 \n\t" \
617 "st.a %2, %%a2 \n\t" \
Manuel Pégourié-Gonnard3f687ad2014-01-10 15:47:50 +0100618 : "=m" (c), "=m" (d), "=m" (s) \
619 : "m" (s), "m" (d), "m" (c), "m" (b) \
620 : "d0", "d1", "e2", "d4", "a2", "a3" \
621 );
Paul Bakker5121ce52009-01-03 21:22:43 +0000622
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200623# endif /* TriCore */
Paul Bakker5121ce52009-01-03 21:22:43 +0000624
Manuel Pégourié-Gonnard25caaf32016-01-08 14:29:11 +0100625/*
Simon Butcher3ad2efd2018-05-02 14:49:38 +0100626 * Note, gcc -O0 by default uses r7 for the frame pointer, so it complains about
627 * our use of r7 below, unless -fomit-frame-pointer is passed.
Manuel Pégourié-Gonnard25caaf32016-01-08 14:29:11 +0100628 *
629 * On the other hand, -fomit-frame-pointer is implied by any -Ox options with
630 * x !=0, which we can detect using __OPTIMIZE__ (which is also defined by
631 * clang and armcc5 under the same conditions).
632 *
633 * So, only use the optimized assembly below for optimized build, which avoids
634 * the build error and is pretty reasonable anyway.
635 */
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200636# if defined(__GNUC__) && !defined(__OPTIMIZE__)
637# define MULADDC_CANNOT_USE_R7
638# endif
Manuel Pégourié-Gonnard25caaf32016-01-08 14:29:11 +0100639
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200640# if defined(__arm__) && !defined(MULADDC_CANNOT_USE_R7)
Paul Bakker5121ce52009-01-03 21:22:43 +0000641
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200642# if defined(__thumb__) && !defined(__thumb2__)
Paul Bakker4f9a7bb2012-07-02 08:36:36 +0000643
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200644# define MULADDC_INIT \
Manuel Pégourié-Gonnardacbcbba2014-06-19 17:20:43 +0200645 asm( \
646 "ldr r0, %3 \n\t" \
647 "ldr r1, %4 \n\t" \
648 "ldr r2, %5 \n\t" \
649 "ldr r3, %6 \n\t" \
650 "lsr r7, r3, #16 \n\t" \
651 "mov r9, r7 \n\t" \
652 "lsl r7, r3, #16 \n\t" \
653 "lsr r7, r7, #16 \n\t" \
654 "mov r8, r7 \n\t"
Paul Bakker4f9a7bb2012-07-02 08:36:36 +0000655
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200656# define MULADDC_CORE \
657 "ldmia r0!, {r6} \n\t" \
658 "lsr r7, r6, #16 \n\t" \
659 "lsl r6, r6, #16 \n\t" \
660 "lsr r6, r6, #16 \n\t" \
661 "mov r4, r8 \n\t" \
662 "mul r4, r6 \n\t" \
663 "mov r3, r9 \n\t" \
664 "mul r6, r3 \n\t" \
665 "mov r5, r9 \n\t" \
666 "mul r5, r7 \n\t" \
667 "mov r3, r8 \n\t" \
668 "mul r7, r3 \n\t" \
669 "lsr r3, r6, #16 \n\t" \
670 "add r5, r5, r3 \n\t" \
671 "lsr r3, r7, #16 \n\t" \
672 "add r5, r5, r3 \n\t" \
673 "add r4, r4, r2 \n\t" \
674 "mov r2, #0 \n\t" \
675 "adc r5, r2 \n\t" \
676 "lsl r3, r6, #16 \n\t" \
677 "add r4, r4, r3 \n\t" \
678 "adc r5, r2 \n\t" \
679 "lsl r3, r7, #16 \n\t" \
680 "add r4, r4, r3 \n\t" \
681 "adc r5, r2 \n\t" \
682 "ldr r3, [r1] \n\t" \
683 "add r4, r4, r3 \n\t" \
684 "adc r2, r5 \n\t" \
685 "stmia r1!, {r4} \n\t"
Paul Bakker4f9a7bb2012-07-02 08:36:36 +0000686
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200687# define MULADDC_STOP \
Manuel Pégourié-Gonnardacbcbba2014-06-19 17:20:43 +0200688 "str r2, %0 \n\t" \
689 "str r1, %1 \n\t" \
690 "str r0, %2 \n\t" \
Paul Bakkerfb1cbd32013-03-06 18:14:52 +0100691 : "=m" (c), "=m" (d), "=m" (s) \
692 : "m" (s), "m" (d), "m" (c), "m" (b) \
693 : "r0", "r1", "r2", "r3", "r4", "r5", \
Paul Bakkereff2e6d2013-04-11 17:13:22 +0200694 "r6", "r7", "r8", "r9", "cc" \
Paul Bakkerfb1cbd32013-03-06 18:14:52 +0100695 );
Paul Bakker4f9a7bb2012-07-02 08:36:36 +0000696
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200697# elif (__ARM_ARCH >= 6) && defined(__ARM_FEATURE_DSP) && \
698 (__ARM_FEATURE_DSP == 1)
Aurelien Jarno16b1bd82018-05-21 22:01:21 +0200699
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200700# define MULADDC_INIT \
Aurelien Jarno16b1bd82018-05-21 22:01:21 +0200701 asm(
702
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200703# define MULADDC_CORE \
704 "ldr r0, [%0], #4 \n\t" \
705 "ldr r1, [%1] \n\t" \
706 "umaal r1, %2, %3, r0 \n\t" \
707 "str r1, [%1], #4 \n\t"
Aurelien Jarno16b1bd82018-05-21 22:01:21 +0200708
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200709# define MULADDC_STOP \
Aurelien Jarno16b1bd82018-05-21 22:01:21 +0200710 : "=r" (s), "=r" (d), "=r" (c) \
711 : "r" (b), "0" (s), "1" (d), "2" (c) \
712 : "r0", "r1", "memory" \
713 );
714
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200715# else
Paul Bakkera2713a32011-11-18 12:47:23 +0000716
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200717# define MULADDC_INIT \
Manuel Pégourié-Gonnardacbcbba2014-06-19 17:20:43 +0200718 asm( \
719 "ldr r0, %3 \n\t" \
720 "ldr r1, %4 \n\t" \
721 "ldr r2, %5 \n\t" \
722 "ldr r3, %6 \n\t"
Paul Bakker5121ce52009-01-03 21:22:43 +0000723
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200724# define MULADDC_CORE \
725 "ldr r4, [r0], #4 \n\t" \
726 "mov r5, #0 \n\t" \
727 "ldr r6, [r1] \n\t" \
728 "umlal r2, r5, r3, r4 \n\t" \
729 "adds r7, r6, r2 \n\t" \
730 "adc r2, r5, #0 \n\t" \
731 "str r7, [r1], #4 \n\t"
Paul Bakker5121ce52009-01-03 21:22:43 +0000732
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200733# define MULADDC_STOP \
Manuel Pégourié-Gonnardacbcbba2014-06-19 17:20:43 +0200734 "str r2, %0 \n\t" \
735 "str r1, %1 \n\t" \
736 "str r0, %2 \n\t" \
Paul Bakkerfb1cbd32013-03-06 18:14:52 +0100737 : "=m" (c), "=m" (d), "=m" (s) \
738 : "m" (s), "m" (d), "m" (c), "m" (b) \
739 : "r0", "r1", "r2", "r3", "r4", "r5", \
Paul Bakkereff2e6d2013-04-11 17:13:22 +0200740 "r6", "r7", "cc" \
Paul Bakkerfb1cbd32013-03-06 18:14:52 +0100741 );
Paul Bakker5121ce52009-01-03 21:22:43 +0000742
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200743# endif /* Thumb */
Paul Bakkera2713a32011-11-18 12:47:23 +0000744
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200745# endif /* ARMv3 */
Paul Bakker5121ce52009-01-03 21:22:43 +0000746
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200747# if defined(__alpha__)
Paul Bakker5121ce52009-01-03 21:22:43 +0000748
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200749# define MULADDC_INIT \
Manuel Pégourié-Gonnardacbcbba2014-06-19 17:20:43 +0200750 asm( \
751 "ldq $1, %3 \n\t" \
752 "ldq $2, %4 \n\t" \
753 "ldq $3, %5 \n\t" \
754 "ldq $4, %6 \n\t"
Paul Bakker5121ce52009-01-03 21:22:43 +0000755
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200756# define MULADDC_CORE \
757 "ldq $6, 0($1) \n\t" \
758 "addq $1, 8, $1 \n\t" \
759 "mulq $6, $4, $7 \n\t" \
760 "umulh $6, $4, $6 \n\t" \
761 "addq $7, $3, $7 \n\t" \
762 "cmpult $7, $3, $3 \n\t" \
763 "ldq $5, 0($2) \n\t" \
764 "addq $7, $5, $7 \n\t" \
765 "cmpult $7, $5, $5 \n\t" \
766 "stq $7, 0($2) \n\t" \
767 "addq $2, 8, $2 \n\t" \
768 "addq $6, $3, $3 \n\t" \
769 "addq $5, $3, $3 \n\t"
Paul Bakker5121ce52009-01-03 21:22:43 +0000770
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200771# define MULADDC_STOP \
Manuel Pégourié-Gonnardacbcbba2014-06-19 17:20:43 +0200772 "stq $3, %0 \n\t" \
773 "stq $2, %1 \n\t" \
774 "stq $1, %2 \n\t" \
Manuel Pégourié-Gonnard5af8e642014-01-10 15:53:41 +0100775 : "=m" (c), "=m" (d), "=m" (s) \
776 : "m" (s), "m" (d), "m" (c), "m" (b) \
777 : "$1", "$2", "$3", "$4", "$5", "$6", "$7" \
778 );
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200779# endif /* Alpha */
Paul Bakker5121ce52009-01-03 21:22:43 +0000780
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200781# if defined(__mips__) && !defined(__mips64)
Paul Bakker5121ce52009-01-03 21:22:43 +0000782
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200783# define MULADDC_INIT \
Manuel Pégourié-Gonnardacbcbba2014-06-19 17:20:43 +0200784 asm( \
785 "lw $10, %3 \n\t" \
786 "lw $11, %4 \n\t" \
787 "lw $12, %5 \n\t" \
788 "lw $13, %6 \n\t"
Paul Bakker5121ce52009-01-03 21:22:43 +0000789
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200790# define MULADDC_CORE \
791 "lw $14, 0($10) \n\t" \
792 "multu $13, $14 \n\t" \
793 "addi $10, $10, 4 \n\t" \
794 "mflo $14 \n\t" \
795 "mfhi $9 \n\t" \
796 "addu $14, $12, $14 \n\t" \
797 "lw $15, 0($11) \n\t" \
798 "sltu $12, $14, $12 \n\t" \
799 "addu $15, $14, $15 \n\t" \
800 "sltu $14, $15, $14 \n\t" \
801 "addu $12, $12, $9 \n\t" \
802 "sw $15, 0($11) \n\t" \
803 "addu $12, $12, $14 \n\t" \
804 "addi $11, $11, 4 \n\t"
Paul Bakker5121ce52009-01-03 21:22:43 +0000805
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200806# define MULADDC_STOP \
Manuel Pégourié-Gonnardacbcbba2014-06-19 17:20:43 +0200807 "sw $12, %0 \n\t" \
808 "sw $11, %1 \n\t" \
809 "sw $10, %2 \n\t" \
Manuel Pégourié-Gonnard8b1b1032014-01-07 18:31:06 +0100810 : "=m" (c), "=m" (d), "=m" (s) \
811 : "m" (s), "m" (d), "m" (c), "m" (b) \
Jeffrey Martind25fd8d2019-01-14 18:01:40 -0600812 : "$9", "$10", "$11", "$12", "$13", "$14", "$15", "lo", "hi" \
Manuel Pégourié-Gonnard8b1b1032014-01-07 18:31:06 +0100813 );
Paul Bakker5121ce52009-01-03 21:22:43 +0000814
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200815# endif /* MIPS */
816# endif /* GNUC */
Paul Bakker5121ce52009-01-03 21:22:43 +0000817
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200818# if (defined(_MSC_VER) && defined(_M_IX86)) || defined(__WATCOMC__)
Paul Bakker5121ce52009-01-03 21:22:43 +0000819
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200820# define MULADDC_INIT \
821 __asm mov esi, s __asm mov edi, d __asm mov ecx, c __asm mov ebx, b
Paul Bakker5121ce52009-01-03 21:22:43 +0000822
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200823# define MULADDC_CORE \
824 __asm lodsd __asm mul ebx __asm add eax, ecx __asm adc edx, \
825 0 __asm add eax, [edi] __asm adc edx, 0 __asm mov ecx, \
826 edx __asm stosd
Paul Bakker5121ce52009-01-03 21:22:43 +0000827
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200828# if defined(MBEDTLS_HAVE_SSE2)
Paul Bakker5121ce52009-01-03 21:22:43 +0000829
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200830# define EMIT __asm _emit
Paul Bakker5121ce52009-01-03 21:22:43 +0000831
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200832# define MULADDC_HUIT \
833 EMIT 0x0F EMIT 0x6E EMIT 0xC9 EMIT 0x0F EMIT 0x6E EMIT 0xC3 EMIT 0x0F EMIT 0x6E EMIT 0x1F EMIT 0x0F EMIT 0xD4 EMIT 0xCB EMIT 0x0F EMIT 0x6E EMIT 0x16 EMIT 0x0F EMIT 0xF4 EMIT 0xD0 EMIT 0x0F EMIT 0x6E EMIT 0x66 EMIT 0x04 EMIT 0x0F EMIT 0xF4 EMIT 0xE0 EMIT 0x0F EMIT 0x6E EMIT 0x76 EMIT 0x08 EMIT 0x0F EMIT 0xF4 EMIT 0xF0 EMIT 0x0F EMIT 0x6E EMIT 0x7E EMIT 0x0C EMIT 0x0F EMIT 0xF4 EMIT 0xF8 EMIT 0x0F EMIT 0xD4 EMIT 0xCA EMIT 0x0F EMIT 0x6E EMIT 0x5F EMIT 0x04 EMIT 0x0F EMIT 0xD4 EMIT 0xDC EMIT 0x0F EMIT 0x6E EMIT 0x6F EMIT 0x08 EMIT 0x0F EMIT 0xD4 EMIT 0xEE EMIT 0x0F EMIT 0x6E EMIT 0x67 EMIT 0x0C EMIT 0x0F EMIT 0xD4 EMIT 0xFC EMIT 0x0F EMIT 0x7E EMIT 0x0F EMIT 0x0F EMIT 0x6E EMIT 0x56 EMIT 0x10 EMIT 0x0F EMIT 0xF4 EMIT 0xD0 EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 EMIT 0x0F EMIT 0x6E EMIT 0x66 EMIT 0x14 EMIT 0x0F EMIT 0xF4 EMIT 0xE0 EMIT 0x0F EMIT 0xD4 EMIT 0xCB EMIT 0x0F EMIT 0x6E EMIT 0x76 EMIT 0x18 EMIT 0x0F EMIT 0xF4 EMIT 0xF0 EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x04 EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 EMIT 0x0F EMIT 0x6E EMIT 0x5E EMIT 0x1C EMIT 0x0F EMIT 0xF4 EMIT 0xD8 EMIT 0x0F EMIT 0xD4 EMIT 0xCD EMIT 0x0F EMIT 0x6E EMIT 0x6F EMIT 0x10 EMIT 0x0F EMIT 0xD4 EMIT 0xD5 EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x08 EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 EMIT 0x0F EMIT 0xD4 EMIT 0xCF EMIT 0x0F EMIT 0x6E EMIT 0x6F EMIT 0x14 EMIT 0x0F EMIT 0xD4 EMIT 0xE5 EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x0C EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 EMIT 0x0F EMIT 0xD4 EMIT 0xCA EMIT 0x0F EMIT 0x6E EMIT 0x6F EMIT 0x18 EMIT 0x0F EMIT 0xD4 EMIT 0xF5 EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x10 EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 EMIT 0x0F EMIT 0xD4 EMIT 0xCC EMIT 0x0F EMIT 0x6E EMIT 0x6F EMIT 0x1C EMIT 0x0F EMIT 0xD4 EMIT 0xDD EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x14 EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 EMIT 0x0F EMIT 0xD4 EMIT 0xCE EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x18 EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 EMIT 0x0F EMIT 0xD4 EMIT 0xCB EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x1C EMIT 0x83 EMIT 0xC7 EMIT 0x20 EMIT 0x83 EMIT 0xC6 EMIT 0x20 EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 EMIT 0x0F EMIT 0x7E EMIT 0xC9
Paul Bakker5121ce52009-01-03 21:22:43 +0000834
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200835# define MULADDC_STOP \
836 EMIT 0x0F EMIT 0x77 __asm mov c, ecx __asm mov d, \
837 edi __asm mov s, esi
Paul Bakker5121ce52009-01-03 21:22:43 +0000838
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200839# else
Paul Bakker5121ce52009-01-03 21:22:43 +0000840
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200841# define MULADDC_STOP \
842 __asm mov c, ecx __asm mov d, edi __asm mov s, esi
Paul Bakker5121ce52009-01-03 21:22:43 +0000843
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200844# endif /* SSE2 */
845# endif /* MSVC */
Paul Bakker5121ce52009-01-03 21:22:43 +0000846
Manuel Pégourié-Gonnard2cf5a7c2015-04-08 12:49:31 +0200847#endif /* MBEDTLS_HAVE_ASM */
Paul Bakker5121ce52009-01-03 21:22:43 +0000848
849#if !defined(MULADDC_CORE)
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200850# if defined(MBEDTLS_HAVE_UDBL)
Paul Bakker5121ce52009-01-03 21:22:43 +0000851
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200852# define MULADDC_INIT \
853 { \
854 mbedtls_t_udbl r; \
855 mbedtls_mpi_uint r0, r1;
Paul Bakker5121ce52009-01-03 21:22:43 +0000856
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200857# define MULADDC_CORE \
858 r = *(s++) * (mbedtls_t_udbl)b; \
859 r0 = (mbedtls_mpi_uint)r; \
860 r1 = (mbedtls_mpi_uint)(r >> biL); \
861 r0 += c; \
862 r1 += (r0 < c); \
863 r0 += *d; \
864 r1 += (r0 < *d); \
865 c = r1; \
866 *(d++) = r0;
Paul Bakker5121ce52009-01-03 21:22:43 +0000867
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200868# define MULADDC_STOP }
Paul Bakker5121ce52009-01-03 21:22:43 +0000869
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200870# else
871# define MULADDC_INIT \
872 { \
873 mbedtls_mpi_uint s0, s1, b0, b1; \
874 mbedtls_mpi_uint r0, r1, rx, ry; \
875 b0 = (b << biH) >> biH; \
876 b1 = (b >> biH);
Paul Bakker5121ce52009-01-03 21:22:43 +0000877
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200878# define MULADDC_CORE \
879 s0 = (*s << biH) >> biH; \
880 s1 = (*s >> biH); \
881 s++; \
882 rx = s0 * b1; \
883 r0 = s0 * b0; \
884 ry = s1 * b0; \
885 r1 = s1 * b1; \
886 r1 += (rx >> biH); \
887 r1 += (ry >> biH); \
888 rx <<= biH; \
889 ry <<= biH; \
890 r0 += rx; \
891 r1 += (r0 < rx); \
892 r0 += ry; \
893 r1 += (r0 < ry); \
894 r0 += c; \
895 r1 += (r0 < c); \
896 r0 += *d; \
897 r1 += (r0 < *d); \
898 c = r1; \
899 *(d++) = r0;
Paul Bakker5121ce52009-01-03 21:22:43 +0000900
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200901# define MULADDC_STOP }
Paul Bakker5121ce52009-01-03 21:22:43 +0000902
Mateusz Starzykc0eabdc2021-08-03 14:09:02 +0200903# endif /* C (generic) */
Paul Bakker5121ce52009-01-03 21:22:43 +0000904#endif /* C (longlong) */
905
906#endif /* bn_mul.h */