David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 1 | // SPDX-License-Identifier: GPL-2.0-only |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 2 | /* |
| 3 | * linux/arch/arm/lib/xor-neon.c |
| 4 | * |
| 5 | * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org> |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 6 | */ |
| 7 | |
| 8 | #include <linux/raid/xor.h> |
| 9 | #include <linux/module.h> |
| 10 | |
| 11 | MODULE_LICENSE("GPL"); |
| 12 | |
| 13 | #ifndef __ARM_NEON__ |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 14 | #error You should compile this file with '-march=armv7-a -mfloat-abi=softfp -mfpu=neon' |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 15 | #endif |
| 16 | |
| 17 | /* |
| 18 | * Pull in the reference implementations while instructing GCC (through |
| 19 | * -ftree-vectorize) to attempt to exploit implicit parallelism and emit |
| 20 | * NEON instructions. |
| 21 | */ |
| 22 | #if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6) |
| 23 | #pragma GCC optimize "tree-vectorize" |
| 24 | #else |
| 25 | /* |
| 26 | * While older versions of GCC do not generate incorrect code, they fail to |
| 27 | * recognize the parallel nature of these functions, and emit plain ARM code, |
| 28 | * which is known to be slower than the optimized ARM code in asm-arm/xor.h. |
| 29 | */ |
| 30 | #warning This code requires at least version 4.6 of GCC |
| 31 | #endif |
| 32 | |
| 33 | #pragma GCC diagnostic ignored "-Wunused-variable" |
| 34 | #include <asm-generic/xor.h> |
| 35 | |
| 36 | struct xor_block_template const xor_block_neon_inner = { |
| 37 | .name = "__inner_neon__", |
| 38 | .do_2 = xor_8regs_2, |
| 39 | .do_3 = xor_8regs_3, |
| 40 | .do_4 = xor_8regs_4, |
| 41 | .do_5 = xor_8regs_5, |
| 42 | }; |
| 43 | EXPORT_SYMBOL(xor_block_neon_inner); |