blob: 9bc1ff6096ab7e21b1a2a3fbcab2429510965171 [file] [log] [blame]
Paul Bakker5121ce52009-01-03 21:22:43 +00001/**
2 * \file bn_mul.h
Paul Bakkere0ccd0a2009-01-04 16:27:10 +00003 *
Paul Bakker37ca75d2011-01-06 12:28:03 +00004 * \brief Multi-precision integer library
5 *
Paul Bakker84f12b72010-07-18 10:13:04 +00006 * Copyright (C) 2006-2010, Brainspark B.V.
Paul Bakkerb96f1542010-07-18 20:36:00 +00007 *
8 * This file is part of PolarSSL (http://www.polarssl.org)
Paul Bakker84f12b72010-07-18 10:13:04 +00009 * Lead Maintainer: Paul Bakker <polarssl_maintainer at polarssl.org>
Paul Bakkerb96f1542010-07-18 20:36:00 +000010 *
Paul Bakker77b385e2009-07-28 17:23:11 +000011 * All rights reserved.
Paul Bakkere0ccd0a2009-01-04 16:27:10 +000012 *
Paul Bakkere0ccd0a2009-01-04 16:27:10 +000013 * This program is free software; you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation; either version 2 of the License, or
16 * (at your option) any later version.
17 *
18 * This program is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License along
24 * with this program; if not, write to the Free Software Foundation, Inc.,
25 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
Paul Bakker5121ce52009-01-03 21:22:43 +000026 */
27/*
28 * Multiply source vector [s] with b, add result
29 * to destination vector [d] and set carry c.
30 *
31 * Currently supports:
32 *
33 * . IA-32 (386+) . AMD64 / EM64T
34 * . IA-32 (SSE2) . Motorola 68000
35 * . PowerPC, 32-bit . MicroBlaze
36 * . PowerPC, 64-bit . TriCore
37 * . SPARC v8 . ARM v3+
38 * . Alpha . MIPS32
39 * . C, longlong . C, generic
40 */
Paul Bakker40e46942009-01-03 21:51:57 +000041#ifndef POLARSSL_BN_MUL_H
42#define POLARSSL_BN_MUL_H
Paul Bakker5121ce52009-01-03 21:22:43 +000043
Paul Bakker66219872012-01-22 20:38:13 +000044#include "bignum.h"
Paul Bakker5121ce52009-01-03 21:22:43 +000045
Paul Bakker40e46942009-01-03 21:51:57 +000046#if defined(POLARSSL_HAVE_ASM)
Paul Bakker5121ce52009-01-03 21:22:43 +000047
48#if defined(__GNUC__)
49#if defined(__i386__)
50
Paul Bakkerc89cf7c2009-07-19 21:37:39 +000051#define MULADDC_INIT \
52 asm( " \
53 movl %%ebx, %0; \
54 movl %5, %%esi; \
55 movl %6, %%edi; \
56 movl %7, %%ecx; \
57 movl %8, %%ebx; \
58 "
Paul Bakker5121ce52009-01-03 21:22:43 +000059
Paul Bakker40fe2b02009-07-08 19:49:01 +000060#define MULADDC_CORE \
Paul Bakkerc89cf7c2009-07-19 21:37:39 +000061 " \
Paul Bakker40fe2b02009-07-08 19:49:01 +000062 lodsl; \
63 mull %%ebx; \
64 addl %%ecx, %%eax; \
65 adcl $0, %%edx; \
66 addl (%%edi), %%eax; \
67 adcl $0, %%edx; \
68 movl %%edx, %%ecx; \
Paul Bakkerc89cf7c2009-07-19 21:37:39 +000069 stosl; \
70 "
Paul Bakker5121ce52009-01-03 21:22:43 +000071
Paul Bakker40e46942009-01-03 21:51:57 +000072#if defined(POLARSSL_HAVE_SSE2)
Paul Bakker5121ce52009-01-03 21:22:43 +000073
Paul Bakkerc89cf7c2009-07-19 21:37:39 +000074#define MULADDC_HUIT \
75 " \
76 movd %%ecx, %%mm1; \
77 movd %%ebx, %%mm0; \
78 movd (%%edi), %%mm3; \
79 paddq %%mm3, %%mm1; \
80 movd (%%esi), %%mm2; \
81 pmuludq %%mm0, %%mm2; \
82 movd 4(%%esi), %%mm4; \
83 pmuludq %%mm0, %%mm4; \
84 movd 8(%%esi), %%mm6; \
85 pmuludq %%mm0, %%mm6; \
86 movd 12(%%esi), %%mm7; \
87 pmuludq %%mm0, %%mm7; \
88 paddq %%mm2, %%mm1; \
89 movd 4(%%edi), %%mm3; \
90 paddq %%mm4, %%mm3; \
91 movd 8(%%edi), %%mm5; \
92 paddq %%mm6, %%mm5; \
93 movd 12(%%edi), %%mm4; \
94 paddq %%mm4, %%mm7; \
95 movd %%mm1, (%%edi); \
96 movd 16(%%esi), %%mm2; \
97 pmuludq %%mm0, %%mm2; \
98 psrlq $32, %%mm1; \
99 movd 20(%%esi), %%mm4; \
100 pmuludq %%mm0, %%mm4; \
101 paddq %%mm3, %%mm1; \
102 movd 24(%%esi), %%mm6; \
103 pmuludq %%mm0, %%mm6; \
104 movd %%mm1, 4(%%edi); \
105 psrlq $32, %%mm1; \
106 movd 28(%%esi), %%mm3; \
107 pmuludq %%mm0, %%mm3; \
108 paddq %%mm5, %%mm1; \
109 movd 16(%%edi), %%mm5; \
110 paddq %%mm5, %%mm2; \
111 movd %%mm1, 8(%%edi); \
112 psrlq $32, %%mm1; \
113 paddq %%mm7, %%mm1; \
114 movd 20(%%edi), %%mm5; \
115 paddq %%mm5, %%mm4; \
116 movd %%mm1, 12(%%edi); \
117 psrlq $32, %%mm1; \
118 paddq %%mm2, %%mm1; \
119 movd 24(%%edi), %%mm5; \
120 paddq %%mm5, %%mm6; \
121 movd %%mm1, 16(%%edi); \
122 psrlq $32, %%mm1; \
123 paddq %%mm4, %%mm1; \
124 movd 28(%%edi), %%mm5; \
125 paddq %%mm5, %%mm3; \
126 movd %%mm1, 20(%%edi); \
127 psrlq $32, %%mm1; \
128 paddq %%mm6, %%mm1; \
129 movd %%mm1, 24(%%edi); \
130 psrlq $32, %%mm1; \
131 paddq %%mm3, %%mm1; \
132 movd %%mm1, 28(%%edi); \
133 addl $32, %%edi; \
134 addl $32, %%esi; \
135 psrlq $32, %%mm1; \
136 movd %%mm1, %%ecx; \
137 "
Paul Bakker5121ce52009-01-03 21:22:43 +0000138
Paul Bakkerc89cf7c2009-07-19 21:37:39 +0000139#define MULADDC_STOP \
140 " \
141 emms; \
142 movl %4, %%ebx; \
143 movl %%ecx, %1; \
144 movl %%edi, %2; \
145 movl %%esi, %3; \
146 " \
147 : "=m" (t), "=m" (c), "=m" (d), "=m" (s) \
148 : "m" (t), "m" (s), "m" (d), "m" (c), "m" (b) \
149 : "eax", "ecx", "edx", "esi", "edi" \
150 );
Paul Bakker5121ce52009-01-03 21:22:43 +0000151
152#else
153
Paul Bakkerc89cf7c2009-07-19 21:37:39 +0000154#define MULADDC_STOP \
155 " \
156 movl %4, %%ebx; \
157 movl %%ecx, %1; \
158 movl %%edi, %2; \
159 movl %%esi, %3; \
160 " \
161 : "=m" (t), "=m" (c), "=m" (d), "=m" (s) \
162 : "m" (t), "m" (s), "m" (d), "m" (c), "m" (b) \
163 : "eax", "ecx", "edx", "esi", "edi" \
164 );
Paul Bakker5121ce52009-01-03 21:22:43 +0000165#endif /* SSE2 */
166#endif /* i386 */
167
168#if defined(__amd64__) || defined (__x86_64__)
169
Manuel Pégourié-Gonnarddef018d2014-01-07 17:50:46 +0100170#define MULADDC_INIT \
171 asm( \
172 " \
173 movq %3, %%rsi; \
174 movq %4, %%rdi; \
175 movq %5, %%rcx; \
176 movq %6, %%rbx; \
177 xorq %%r8, %%r8; \
178 "
Paul Bakker5121ce52009-01-03 21:22:43 +0000179
Manuel Pégourié-Gonnarddef018d2014-01-07 17:50:46 +0100180#define MULADDC_CORE \
181 " \
182 movq (%%rsi), %%rax; \
183 mulq %%rbx; \
184 addq $8, %%rsi; \
185 addq %%rcx, %%rax; \
186 movq %%r8, %%rcx; \
187 adcq $0, %%rdx; \
188 nop; \
189 addq %%rax, (%%rdi); \
190 adcq %%rdx, %%rcx; \
191 addq $8, %%rdi; \
192 "
Paul Bakker5121ce52009-01-03 21:22:43 +0000193
Manuel Pégourié-Gonnarddef018d2014-01-07 17:50:46 +0100194#define MULADDC_STOP \
195 " \
196 movq %%rcx, %0; \
197 movq %%rdi, %1; \
198 movq %%rsi, %2; \
199 " \
200 : "=m" (c), "=m" (d), "=m" (s) \
201 : "m" (s), "m" (d), "m" (c), "m" (b) \
202 : "rax", "rcx", "rdx", "rbx", "rsi", "rdi", "r8" \
203 );
Paul Bakker5121ce52009-01-03 21:22:43 +0000204
205#endif /* AMD64 */
206
207#if defined(__mc68020__) || defined(__mcpu32__)
208
209#define MULADDC_INIT \
210 asm( "movl %0, %%a2 " :: "m" (s)); \
211 asm( "movl %0, %%a3 " :: "m" (d)); \
212 asm( "movl %0, %%d3 " :: "m" (c)); \
213 asm( "movl %0, %%d2 " :: "m" (b)); \
214 asm( "moveq #0, %d0 " );
215
216#define MULADDC_CORE \
217 asm( "movel %a2@+, %d1 " ); \
218 asm( "mulul %d2, %d4:%d1 " ); \
219 asm( "addl %d3, %d1 " ); \
220 asm( "addxl %d0, %d4 " ); \
221 asm( "moveq #0, %d3 " ); \
222 asm( "addl %d1, %a3@+ " ); \
223 asm( "addxl %d4, %d3 " );
224
225#define MULADDC_STOP \
226 asm( "movl %%d3, %0 " : "=m" (c)); \
227 asm( "movl %%a3, %0 " : "=m" (d)); \
228 asm( "movl %%a2, %0 " : "=m" (s) :: \
229 "d0", "d1", "d2", "d3", "d4", "a2", "a3" );
230
231#define MULADDC_HUIT \
232 asm( "movel %a2@+, %d1 " ); \
233 asm( "mulul %d2, %d4:%d1 " ); \
234 asm( "addxl %d3, %d1 " ); \
235 asm( "addxl %d0, %d4 " ); \
236 asm( "addl %d1, %a3@+ " ); \
237 asm( "movel %a2@+, %d1 " ); \
238 asm( "mulul %d2, %d3:%d1 " ); \
239 asm( "addxl %d4, %d1 " ); \
240 asm( "addxl %d0, %d3 " ); \
241 asm( "addl %d1, %a3@+ " ); \
242 asm( "movel %a2@+, %d1 " ); \
243 asm( "mulul %d2, %d4:%d1 " ); \
244 asm( "addxl %d3, %d1 " ); \
245 asm( "addxl %d0, %d4 " ); \
246 asm( "addl %d1, %a3@+ " ); \
247 asm( "movel %a2@+, %d1 " ); \
248 asm( "mulul %d2, %d3:%d1 " ); \
249 asm( "addxl %d4, %d1 " ); \
250 asm( "addxl %d0, %d3 " ); \
251 asm( "addl %d1, %a3@+ " ); \
252 asm( "movel %a2@+, %d1 " ); \
253 asm( "mulul %d2, %d4:%d1 " ); \
254 asm( "addxl %d3, %d1 " ); \
255 asm( "addxl %d0, %d4 " ); \
256 asm( "addl %d1, %a3@+ " ); \
257 asm( "movel %a2@+, %d1 " ); \
258 asm( "mulul %d2, %d3:%d1 " ); \
259 asm( "addxl %d4, %d1 " ); \
260 asm( "addxl %d0, %d3 " ); \
261 asm( "addl %d1, %a3@+ " ); \
262 asm( "movel %a2@+, %d1 " ); \
263 asm( "mulul %d2, %d4:%d1 " ); \
264 asm( "addxl %d3, %d1 " ); \
265 asm( "addxl %d0, %d4 " ); \
266 asm( "addl %d1, %a3@+ " ); \
267 asm( "movel %a2@+, %d1 " ); \
268 asm( "mulul %d2, %d3:%d1 " ); \
269 asm( "addxl %d4, %d1 " ); \
270 asm( "addxl %d0, %d3 " ); \
271 asm( "addl %d1, %a3@+ " ); \
272 asm( "addxl %d0, %d3 " );
273
274#endif /* MC68000 */
275
276#if defined(__powerpc__) || defined(__ppc__)
277#if defined(__powerpc64__) || defined(__ppc64__)
278
279#if defined(__MACH__) && defined(__APPLE__)
280
Manuel Pégourié-Gonnard02d800c2014-01-07 19:16:48 +0100281#define MULADDC_INIT \
282 asm( \
283 " \
284 ld r3, %3; \
285 ld r4, %4; \
286 ld r5, %5; \
287 ld r6, %6; \
288 addi r3, r3, -8; \
289 addi r4, r4, -8; \
290 addic r5, r5, 0; \
291 "
Paul Bakker5121ce52009-01-03 21:22:43 +0000292
Manuel Pégourié-Gonnard02d800c2014-01-07 19:16:48 +0100293#define MULADDC_CORE \
294 " \
295 ldu r7, 8(r3); \
296 mulld r8, r7, r6; \
297 mulhdu r9, r7, r6; \
298 adde r8, r8, r5; \
299 ld r7, 8(r4); \
300 addze r5, r9; \
301 addc r8, r8, r7; \
302 stdu r8, 8(r4); \
303 "
Paul Bakker5121ce52009-01-03 21:22:43 +0000304
Manuel Pégourié-Gonnard02d800c2014-01-07 19:16:48 +0100305#define MULADDC_STOP \
306 " \
307 addze r5, r5; \
308 addi r4, r4, 8; \
309 addi r3, r3, 8; \
310 std r5, %0; \
311 std r4, %1; \
312 std r3, %2; \
313 " \
314 : "=m" (c), "=m" (d), "=m" (s) \
315 : "m" (s), "m" (d), "m" (c), "m" (b) \
316 : "r3", "r4", "r5", "r6", "r7", "r8", "r9" \
317 );
318
Paul Bakker5121ce52009-01-03 21:22:43 +0000319
320#else
321
Manuel Pégourié-Gonnard02d800c2014-01-07 19:16:48 +0100322#define MULADDC_INIT \
323 asm( \
324 " \
325 ld %%r3, %3; \
326 ld %%r4, %4; \
327 ld %%r5, %5; \
328 ld %%r6, %6; \
329 addi %%r3, %%r3, -8; \
330 addi %%r4, %%r4, -8; \
331 addic %%r5, %%r5, 0; \
332 "
Paul Bakker5121ce52009-01-03 21:22:43 +0000333
Manuel Pégourié-Gonnard02d800c2014-01-07 19:16:48 +0100334#define MULADDC_CORE \
335 " \
336 ldu %%r7, 8(%%r3); \
337 mulld %%r8, %%r7, %%r6; \
338 mulhdu %%r9, %%r7, %%r6; \
339 adde %%r8, %%r8, %%r5; \
340 ld %%r7, 8(%%r4); \
341 addze %%r5, %%r9; \
342 addc %%r8, %%r8, %%r7; \
343 stdu %%r8, 8(%%r4); \
344 "
Paul Bakker5121ce52009-01-03 21:22:43 +0000345
Manuel Pégourié-Gonnard02d800c2014-01-07 19:16:48 +0100346#define MULADDC_STOP \
347 " \
348 addze %%r5, %%r5; \
349 addi %%r4, %%r4, 8; \
350 addi %%r3, %%r3, 8; \
351 std %%r5, %0; \
352 std %%r4, %1; \
353 std %%r3, %2; \
354 " \
355 : "=m" (c), "=m" (d), "=m" (s) \
356 : "m" (s), "m" (d), "m" (c), "m" (b) \
357 : "r3", "r4", "r5", "r6", "r7", "r8", "r9" \
358 );
Paul Bakker5121ce52009-01-03 21:22:43 +0000359
360#endif
361
362#else /* PPC32 */
363
364#if defined(__MACH__) && defined(__APPLE__)
365
Manuel Pégourié-Gonnard02d800c2014-01-07 19:16:48 +0100366#define MULADDC_INIT \
367 asm( \
368 " \
369 lwz r3, %3; \
370 lwz r4, %4; \
371 lwz r5, %5; \
372 lwz r6, %6; \
373 addi r3, r3, -4; \
374 addi r4, r4, -4; \
375 addic r5, r5, 0; \
376 "
Paul Bakker5121ce52009-01-03 21:22:43 +0000377
Manuel Pégourié-Gonnard02d800c2014-01-07 19:16:48 +0100378#define MULADDC_CORE \
379 " \
380 lwzu r7, 4(r3); \
381 mullw r8, r7, r6; \
382 mulhwu r9, r7, r6; \
383 adde r8, r8, r5; \
384 lwz r7, 4(r4); \
385 addze r5, r9; \
386 addc r8, r8, r7; \
387 stwu r8, 4(r4); \
388 "
Paul Bakker5121ce52009-01-03 21:22:43 +0000389
Manuel Pégourié-Gonnard02d800c2014-01-07 19:16:48 +0100390#define MULADDC_STOP \
391 " \
392 addze r5, r5; \
393 addi r4, r4, 4; \
394 addi r3, r3, 4; \
395 stw r5, %0; \
396 stw r4, %1; \
397 stw r3, %2; \
398 " \
399 : "=m" (c), "=m" (d), "=m" (s) \
400 : "m" (s), "m" (d), "m" (c), "m" (b) \
401 : "r3", "r4", "r5", "r6", "r7", "r8", "r9" \
402 );
Paul Bakker5121ce52009-01-03 21:22:43 +0000403
404#else
405
Manuel Pégourié-Gonnard02d800c2014-01-07 19:16:48 +0100406#define MULADDC_INIT \
407 asm( \
408 " \
409 lwz %%r3, %3; \
410 lwz %%r4, %4; \
411 lwz %%r5, %5; \
412 lwz %%r6, %6; \
413 addi %%r3, %%r3, -4; \
414 addi %%r4, %%r4, -4; \
415 addic %%r5, %%r5, 0; \
416 "
Paul Bakker5121ce52009-01-03 21:22:43 +0000417
Manuel Pégourié-Gonnard02d800c2014-01-07 19:16:48 +0100418#define MULADDC_CORE \
419 " \
420 lwzu %%r7, 4(%%r3); \
421 mullw %%r8, %%r7, %%r6; \
422 mulhwu %%r9, %%r7, %%r6; \
423 adde %%r8, %%r8, %%r5; \
424 lwz %%r7, 4(%%r4); \
425 addze %%r5, %%r9; \
426 addc %%r8, %%r8, %%r7; \
427 stwu %%r8, 4(%%r4); \
428 "
Paul Bakker5121ce52009-01-03 21:22:43 +0000429
Manuel Pégourié-Gonnard02d800c2014-01-07 19:16:48 +0100430#define MULADDC_STOP \
431 " \
432 addze %%r5, %%r5; \
433 addi %%r4, %%r4, 4; \
434 addi %%r3, %%r3, 4; \
435 stw %%r5, %0; \
436 stw %%r4, %1; \
437 stw %%r3, %2; \
438 " \
439 : "=m" (c), "=m" (d), "=m" (s) \
440 : "m" (s), "m" (d), "m" (c), "m" (b) \
441 : "r3", "r4", "r5", "r6", "r7", "r8", "r9" \
442 );
Paul Bakker5121ce52009-01-03 21:22:43 +0000443
444#endif
445
446#endif /* PPC32 */
447#endif /* PPC64 */
448
Paul Bakker4f024b72012-10-30 07:29:57 +0000449#if defined(__sparc__) && defined(__sparc64__)
Paul Bakker5121ce52009-01-03 21:22:43 +0000450
451#define MULADDC_INIT \
Paul Bakker4f024b72012-10-30 07:29:57 +0000452 asm( \
453 " \
454 ldx %3, %%o0; \
455 ldx %4, %%o1; \
456 ld %5, %%o2; \
457 ld %6, %%o3; \
458 "
Paul Bakker5121ce52009-01-03 21:22:43 +0000459
460#define MULADDC_CORE \
Paul Bakker4f024b72012-10-30 07:29:57 +0000461 " \
462 ld [%%o0], %%o4; \
463 inc 4, %%o0; \
464 ld [%%o1], %%o5; \
465 umul %%o3, %%o4, %%o4; \
466 addcc %%o4, %%o2, %%o4; \
467 rd %%y, %%g1; \
468 addx %%g1, 0, %%g1; \
469 addcc %%o4, %%o5, %%o4; \
470 st %%o4, [%%o1]; \
471 addx %%g1, 0, %%o2; \
472 inc 4, %%o1; \
473 "
Paul Bakker5121ce52009-01-03 21:22:43 +0000474
475#define MULADDC_STOP \
Paul Bakker4f024b72012-10-30 07:29:57 +0000476 " \
477 st %%o2, %0; \
478 stx %%o1, %1; \
479 stx %%o0, %2; \
480 " \
481 : "=m" (c), "=m" (d), "=m" (s) \
482 : "m" (s), "m" (d), "m" (c), "m" (b) \
483 : "g1", "o0", "o1", "o2", "o3", "o4", \
484 "o5" \
485 );
486#endif /* SPARCv9 */
487
488#if defined(__sparc__) && !defined(__sparc64__)
489
490#define MULADDC_INIT \
491 asm( \
492 " \
493 ld %3, %%o0; \
494 ld %4, %%o1; \
495 ld %5, %%o2; \
496 ld %6, %%o3; \
497 "
498
499#define MULADDC_CORE \
500 " \
501 ld [%%o0], %%o4; \
502 inc 4, %%o0; \
503 ld [%%o1], %%o5; \
504 umul %%o3, %%o4, %%o4; \
505 addcc %%o4, %%o2, %%o4; \
506 rd %%y, %%g1; \
507 addx %%g1, 0, %%g1; \
508 addcc %%o4, %%o5, %%o4; \
509 st %%o4, [%%o1]; \
510 addx %%g1, 0, %%o2; \
511 inc 4, %%o1; \
512 "
513
514#define MULADDC_STOP \
515 " \
516 st %%o2, %0; \
517 st %%o1, %1; \
518 st %%o0, %2; \
519 " \
520 : "=m" (c), "=m" (d), "=m" (s) \
521 : "m" (s), "m" (d), "m" (c), "m" (b) \
522 : "g1", "o0", "o1", "o2", "o3", "o4", \
523 "o5" \
524 );
Paul Bakker5121ce52009-01-03 21:22:43 +0000525
526#endif /* SPARCv8 */
527
528#if defined(__microblaze__) || defined(microblaze)
529
530#define MULADDC_INIT \
531 asm( "lwi r3, %0 " :: "m" (s)); \
532 asm( "lwi r4, %0 " :: "m" (d)); \
533 asm( "lwi r5, %0 " :: "m" (c)); \
534 asm( "lwi r6, %0 " :: "m" (b)); \
535 asm( "andi r7, r6, 0xffff" ); \
536 asm( "bsrli r6, r6, 16 " );
537
538#define MULADDC_CORE \
539 asm( "lhui r8, r3, 0 " ); \
540 asm( "addi r3, r3, 2 " ); \
541 asm( "lhui r9, r3, 0 " ); \
542 asm( "addi r3, r3, 2 " ); \
543 asm( "mul r10, r9, r6 " ); \
544 asm( "mul r11, r8, r7 " ); \
545 asm( "mul r12, r9, r7 " ); \
546 asm( "mul r13, r8, r6 " ); \
547 asm( "bsrli r8, r10, 16 " ); \
548 asm( "bsrli r9, r11, 16 " ); \
549 asm( "add r13, r13, r8 " ); \
550 asm( "add r13, r13, r9 " ); \
551 asm( "bslli r10, r10, 16 " ); \
552 asm( "bslli r11, r11, 16 " ); \
553 asm( "add r12, r12, r10 " ); \
554 asm( "addc r13, r13, r0 " ); \
555 asm( "add r12, r12, r11 " ); \
556 asm( "addc r13, r13, r0 " ); \
557 asm( "lwi r10, r4, 0 " ); \
558 asm( "add r12, r12, r10 " ); \
559 asm( "addc r13, r13, r0 " ); \
560 asm( "add r12, r12, r5 " ); \
561 asm( "addc r5, r13, r0 " ); \
562 asm( "swi r12, r4, 0 " ); \
563 asm( "addi r4, r4, 4 " );
564
565#define MULADDC_STOP \
566 asm( "swi r5, %0 " : "=m" (c)); \
567 asm( "swi r4, %0 " : "=m" (d)); \
568 asm( "swi r3, %0 " : "=m" (s) :: \
569 "r3", "r4" , "r5" , "r6" , "r7" , "r8" , \
570 "r9", "r10", "r11", "r12", "r13" );
571
572#endif /* MicroBlaze */
573
574#if defined(__tricore__)
575
576#define MULADDC_INIT \
577 asm( "ld.a %%a2, %0 " :: "m" (s)); \
578 asm( "ld.a %%a3, %0 " :: "m" (d)); \
579 asm( "ld.w %%d4, %0 " :: "m" (c)); \
580 asm( "ld.w %%d1, %0 " :: "m" (b)); \
581 asm( "xor %d5, %d5 " );
582
583#define MULADDC_CORE \
584 asm( "ld.w %d0, [%a2+] " ); \
585 asm( "madd.u %e2, %e4, %d0, %d1 " ); \
586 asm( "ld.w %d0, [%a3] " ); \
587 asm( "addx %d2, %d2, %d0 " ); \
588 asm( "addc %d3, %d3, 0 " ); \
589 asm( "mov %d4, %d3 " ); \
590 asm( "st.w [%a3+], %d2 " );
591
592#define MULADDC_STOP \
593 asm( "st.w %0, %%d4 " : "=m" (c)); \
594 asm( "st.a %0, %%a3 " : "=m" (d)); \
595 asm( "st.a %0, %%a2 " : "=m" (s) :: \
596 "d0", "d1", "e2", "d4", "a2", "a3" );
597
598#endif /* TriCore */
599
600#if defined(__arm__)
601
Paul Bakkerfc4f46f2013-06-24 19:23:56 +0200602#if defined(__thumb__) && !defined(__thumb2__)
Paul Bakker4f9a7bb2012-07-02 08:36:36 +0000603
604#define MULADDC_INIT \
Paul Bakkerfb1cbd32013-03-06 18:14:52 +0100605 asm( \
606 " \
607 ldr r0, %3; \
608 ldr r1, %4; \
609 ldr r2, %5; \
610 ldr r3, %6; \
611 lsr r7, r3, #16; \
612 mov r9, r7; \
613 lsl r7, r3, #16; \
614 lsr r7, r7, #16; \
615 mov r8, r7; \
616 "
Paul Bakker4f9a7bb2012-07-02 08:36:36 +0000617
618#define MULADDC_CORE \
Paul Bakkerfb1cbd32013-03-06 18:14:52 +0100619 " \
620 ldmia r0!, {r6}; \
621 lsr r7, r6, #16; \
622 lsl r6, r6, #16; \
623 lsr r6, r6, #16; \
624 mov r4, r8; \
625 mul r4, r6; \
626 mov r3, r9; \
627 mul r6, r3; \
628 mov r5, r9; \
629 mul r5, r7; \
630 mov r3, r8; \
631 mul r7, r3; \
632 lsr r3, r6, #16; \
633 add r5, r5, r3; \
634 lsr r3, r7, #16; \
635 add r5, r5, r3; \
636 add r4, r4, r2; \
637 mov r2, #0; \
638 adc r5, r2; \
639 lsl r3, r6, #16; \
640 add r4, r4, r3; \
641 adc r5, r2; \
642 lsl r3, r7, #16; \
643 add r4, r4, r3; \
644 adc r5, r2; \
645 ldr r3, [r1]; \
646 add r4, r4, r3; \
647 adc r2, r5; \
648 stmia r1!, {r4}; \
649 "
Paul Bakker4f9a7bb2012-07-02 08:36:36 +0000650
651#define MULADDC_STOP \
Paul Bakkerfb1cbd32013-03-06 18:14:52 +0100652 " \
653 str r2, %0; \
654 str r1, %1; \
655 str r0, %2; \
656 " \
657 : "=m" (c), "=m" (d), "=m" (s) \
658 : "m" (s), "m" (d), "m" (c), "m" (b) \
659 : "r0", "r1", "r2", "r3", "r4", "r5", \
Paul Bakkereff2e6d2013-04-11 17:13:22 +0200660 "r6", "r7", "r8", "r9", "cc" \
Paul Bakkerfb1cbd32013-03-06 18:14:52 +0100661 );
Paul Bakker4f9a7bb2012-07-02 08:36:36 +0000662
663#else
Paul Bakkera2713a32011-11-18 12:47:23 +0000664
Paul Bakker5121ce52009-01-03 21:22:43 +0000665#define MULADDC_INIT \
Paul Bakkerfb1cbd32013-03-06 18:14:52 +0100666 asm( \
667 " \
668 ldr r0, %3; \
669 ldr r1, %4; \
670 ldr r2, %5; \
671 ldr r3, %6; \
672 "
Paul Bakker5121ce52009-01-03 21:22:43 +0000673
674#define MULADDC_CORE \
Paul Bakkerfb1cbd32013-03-06 18:14:52 +0100675 " \
676 ldr r4, [r0], #4; \
677 mov r5, #0; \
678 ldr r6, [r1]; \
679 umlal r2, r5, r3, r4; \
680 adds r7, r6, r2; \
681 adc r2, r5, #0; \
682 str r7, [r1], #4; \
683 "
Paul Bakker5121ce52009-01-03 21:22:43 +0000684
685#define MULADDC_STOP \
Paul Bakkerfb1cbd32013-03-06 18:14:52 +0100686 " \
687 str r2, %0; \
688 str r1, %1; \
689 str r0, %2; \
690 " \
691 : "=m" (c), "=m" (d), "=m" (s) \
692 : "m" (s), "m" (d), "m" (c), "m" (b) \
693 : "r0", "r1", "r2", "r3", "r4", "r5", \
Paul Bakkereff2e6d2013-04-11 17:13:22 +0200694 "r6", "r7", "cc" \
Paul Bakkerfb1cbd32013-03-06 18:14:52 +0100695 );
Paul Bakker5121ce52009-01-03 21:22:43 +0000696
Paul Bakkera2713a32011-11-18 12:47:23 +0000697#endif /* Thumb */
698
Paul Bakker5121ce52009-01-03 21:22:43 +0000699#endif /* ARMv3 */
700
701#if defined(__alpha__)
702
703#define MULADDC_INIT \
704 asm( "ldq $1, %0 " :: "m" (s)); \
705 asm( "ldq $2, %0 " :: "m" (d)); \
706 asm( "ldq $3, %0 " :: "m" (c)); \
707 asm( "ldq $4, %0 " :: "m" (b));
708
709#define MULADDC_CORE \
710 asm( "ldq $6, 0($1) " ); \
711 asm( "addq $1, 8, $1 " ); \
712 asm( "mulq $6, $4, $7 " ); \
713 asm( "umulh $6, $4, $6 " ); \
714 asm( "addq $7, $3, $7 " ); \
715 asm( "cmpult $7, $3, $3 " ); \
716 asm( "ldq $5, 0($2) " ); \
717 asm( "addq $7, $5, $7 " ); \
718 asm( "cmpult $7, $5, $5 " ); \
719 asm( "stq $7, 0($2) " ); \
720 asm( "addq $2, 8, $2 " ); \
721 asm( "addq $6, $3, $3 " ); \
722 asm( "addq $5, $3, $3 " );
723
724#define MULADDC_STOP \
725 asm( "stq $3, %0 " : "=m" (c)); \
726 asm( "stq $2, %0 " : "=m" (d)); \
727 asm( "stq $1, %0 " : "=m" (s) :: \
728 "$1", "$2", "$3", "$4", "$5", "$6", "$7" );
729
730#endif /* Alpha */
731
732#if defined(__mips__)
733
Manuel Pégourié-Gonnard8b1b1032014-01-07 18:31:06 +0100734#define MULADDC_INIT \
735 asm( \
736 " \
737 lw $10, %3; \
738 lw $11, %4; \
739 lw $12, %5; \
740 lw $13, %6; \
741 "
Paul Bakker5121ce52009-01-03 21:22:43 +0000742
Manuel Pégourié-Gonnard8b1b1032014-01-07 18:31:06 +0100743#define MULADDC_CORE \
744 " \
745 lw $14, 0($10); \
746 multu $13, $14; \
747 addi $10, $10, 4; \
748 mflo $14; \
749 mfhi $9; \
750 addu $14, $12, $14; \
751 lw $15, 0($11); \
752 sltu $12, $14, $12; \
753 addu $15, $14, $15; \
754 sltu $14, $15, $14; \
755 addu $12, $12, $9; \
756 sw $15, 0($11); \
757 addu $12, $12, $14; \
758 addi $11, $11, 4; \
759 "
Paul Bakker5121ce52009-01-03 21:22:43 +0000760
Manuel Pégourié-Gonnard8b1b1032014-01-07 18:31:06 +0100761#define MULADDC_STOP \
762 " \
763 sw $12, %0; \
764 sw $11, %1; \
765 sw $10, %2; \
766 " \
767 : "=m" (c), "=m" (d), "=m" (s) \
768 : "m" (s), "m" (d), "m" (c), "m" (b) \
769 : "$9", "$10", "$11", "$12", "$13", "$14", "$15" \
770 );
Paul Bakker5121ce52009-01-03 21:22:43 +0000771
772#endif /* MIPS */
773#endif /* GNUC */
774
775#if (defined(_MSC_VER) && defined(_M_IX86)) || defined(__WATCOMC__)
776
777#define MULADDC_INIT \
778 __asm mov esi, s \
779 __asm mov edi, d \
780 __asm mov ecx, c \
781 __asm mov ebx, b
782
783#define MULADDC_CORE \
784 __asm lodsd \
785 __asm mul ebx \
786 __asm add eax, ecx \
787 __asm adc edx, 0 \
788 __asm add eax, [edi] \
789 __asm adc edx, 0 \
790 __asm mov ecx, edx \
791 __asm stosd
792
Paul Bakker40e46942009-01-03 21:51:57 +0000793#if defined(POLARSSL_HAVE_SSE2)
Paul Bakker5121ce52009-01-03 21:22:43 +0000794
795#define EMIT __asm _emit
796
797#define MULADDC_HUIT \
798 EMIT 0x0F EMIT 0x6E EMIT 0xC9 \
799 EMIT 0x0F EMIT 0x6E EMIT 0xC3 \
800 EMIT 0x0F EMIT 0x6E EMIT 0x1F \
801 EMIT 0x0F EMIT 0xD4 EMIT 0xCB \
802 EMIT 0x0F EMIT 0x6E EMIT 0x16 \
803 EMIT 0x0F EMIT 0xF4 EMIT 0xD0 \
804 EMIT 0x0F EMIT 0x6E EMIT 0x66 EMIT 0x04 \
805 EMIT 0x0F EMIT 0xF4 EMIT 0xE0 \
806 EMIT 0x0F EMIT 0x6E EMIT 0x76 EMIT 0x08 \
807 EMIT 0x0F EMIT 0xF4 EMIT 0xF0 \
808 EMIT 0x0F EMIT 0x6E EMIT 0x7E EMIT 0x0C \
809 EMIT 0x0F EMIT 0xF4 EMIT 0xF8 \
810 EMIT 0x0F EMIT 0xD4 EMIT 0xCA \
811 EMIT 0x0F EMIT 0x6E EMIT 0x5F EMIT 0x04 \
812 EMIT 0x0F EMIT 0xD4 EMIT 0xDC \
813 EMIT 0x0F EMIT 0x6E EMIT 0x6F EMIT 0x08 \
814 EMIT 0x0F EMIT 0xD4 EMIT 0xEE \
815 EMIT 0x0F EMIT 0x6E EMIT 0x67 EMIT 0x0C \
816 EMIT 0x0F EMIT 0xD4 EMIT 0xFC \
817 EMIT 0x0F EMIT 0x7E EMIT 0x0F \
818 EMIT 0x0F EMIT 0x6E EMIT 0x56 EMIT 0x10 \
819 EMIT 0x0F EMIT 0xF4 EMIT 0xD0 \
820 EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \
821 EMIT 0x0F EMIT 0x6E EMIT 0x66 EMIT 0x14 \
822 EMIT 0x0F EMIT 0xF4 EMIT 0xE0 \
823 EMIT 0x0F EMIT 0xD4 EMIT 0xCB \
824 EMIT 0x0F EMIT 0x6E EMIT 0x76 EMIT 0x18 \
825 EMIT 0x0F EMIT 0xF4 EMIT 0xF0 \
826 EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x04 \
827 EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \
828 EMIT 0x0F EMIT 0x6E EMIT 0x5E EMIT 0x1C \
829 EMIT 0x0F EMIT 0xF4 EMIT 0xD8 \
830 EMIT 0x0F EMIT 0xD4 EMIT 0xCD \
831 EMIT 0x0F EMIT 0x6E EMIT 0x6F EMIT 0x10 \
832 EMIT 0x0F EMIT 0xD4 EMIT 0xD5 \
833 EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x08 \
834 EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \
835 EMIT 0x0F EMIT 0xD4 EMIT 0xCF \
836 EMIT 0x0F EMIT 0x6E EMIT 0x6F EMIT 0x14 \
837 EMIT 0x0F EMIT 0xD4 EMIT 0xE5 \
838 EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x0C \
839 EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \
840 EMIT 0x0F EMIT 0xD4 EMIT 0xCA \
841 EMIT 0x0F EMIT 0x6E EMIT 0x6F EMIT 0x18 \
842 EMIT 0x0F EMIT 0xD4 EMIT 0xF5 \
843 EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x10 \
844 EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \
845 EMIT 0x0F EMIT 0xD4 EMIT 0xCC \
846 EMIT 0x0F EMIT 0x6E EMIT 0x6F EMIT 0x1C \
847 EMIT 0x0F EMIT 0xD4 EMIT 0xDD \
848 EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x14 \
849 EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \
850 EMIT 0x0F EMIT 0xD4 EMIT 0xCE \
851 EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x18 \
852 EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \
853 EMIT 0x0F EMIT 0xD4 EMIT 0xCB \
854 EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x1C \
855 EMIT 0x83 EMIT 0xC7 EMIT 0x20 \
856 EMIT 0x83 EMIT 0xC6 EMIT 0x20 \
857 EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \
858 EMIT 0x0F EMIT 0x7E EMIT 0xC9
859
860#define MULADDC_STOP \
861 EMIT 0x0F EMIT 0x77 \
862 __asm mov c, ecx \
863 __asm mov d, edi \
864 __asm mov s, esi \
865
866#else
867
868#define MULADDC_STOP \
869 __asm mov c, ecx \
870 __asm mov d, edi \
871 __asm mov s, esi \
872
873#endif /* SSE2 */
874#endif /* MSVC */
875
Paul Bakker40e46942009-01-03 21:51:57 +0000876#endif /* POLARSSL_HAVE_ASM */
Paul Bakker5121ce52009-01-03 21:22:43 +0000877
878#if !defined(MULADDC_CORE)
Paul Bakker0fd018e2012-10-23 12:44:47 +0000879#if defined(POLARSSL_HAVE_UDBL)
Paul Bakker5121ce52009-01-03 21:22:43 +0000880
881#define MULADDC_INIT \
882{ \
Paul Bakkera755ca12011-04-24 09:11:17 +0000883 t_udbl r; \
884 t_uint r0, r1;
Paul Bakker5121ce52009-01-03 21:22:43 +0000885
886#define MULADDC_CORE \
Paul Bakker5c2364c2012-10-01 14:41:15 +0000887 r = *(s++) * (t_udbl) b; \
Paul Bakker5121ce52009-01-03 21:22:43 +0000888 r0 = r; \
889 r1 = r >> biL; \
890 r0 += c; r1 += (r0 < c); \
891 r0 += *d; r1 += (r0 < *d); \
892 c = r1; *(d++) = r0;
893
894#define MULADDC_STOP \
895}
896
897#else
898#define MULADDC_INIT \
899{ \
Paul Bakkera755ca12011-04-24 09:11:17 +0000900 t_uint s0, s1, b0, b1; \
901 t_uint r0, r1, rx, ry; \
Paul Bakker5121ce52009-01-03 21:22:43 +0000902 b0 = ( b << biH ) >> biH; \
903 b1 = ( b >> biH );
904
905#define MULADDC_CORE \
906 s0 = ( *s << biH ) >> biH; \
907 s1 = ( *s >> biH ); s++; \
908 rx = s0 * b1; r0 = s0 * b0; \
909 ry = s1 * b0; r1 = s1 * b1; \
910 r1 += ( rx >> biH ); \
911 r1 += ( ry >> biH ); \
912 rx <<= biH; ry <<= biH; \
913 r0 += rx; r1 += (r0 < rx); \
914 r0 += ry; r1 += (r0 < ry); \
915 r0 += c; r1 += (r0 < c); \
916 r0 += *d; r1 += (r0 < *d); \
917 c = r1; *(d++) = r0;
918
919#define MULADDC_STOP \
920}
921
922#endif /* C (generic) */
923#endif /* C (longlong) */
924
925#endif /* bn_mul.h */