aboutsummaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorAndre Przywara <andre.przywara@arm.com>2020-09-03 11:04:39 +0100
committerAndre Przywara <andre.przywara@arm.com>2020-09-03 23:00:24 +0100
commit75fab6496e5fce9a11b4e3a160ad2e797acc6ee9 (patch)
tree7563954537e622bbced5d9bf38fdb5f7a182101a /lib
parenta41ca4c3449c51822d318e295b21d452efac2848 (diff)
downloadtrusted-firmware-a-75fab6496e5fce9a11b4e3a160ad2e797acc6ee9.tar.gz
libc: memset: improve performance by avoiding single byte writes
Currently our memset() implementation is safe, but slow. The main reason for that seems to be the single byte writes that it issues, which can show horrible performance, depending on the implementation of the load/store subsystem. Improve the algorithm by trying to issue 64-bit writes. As this only works with aligned pointers, have a head and a tail section which covers unaligned pointers, and leave the bulk of the work to the middle section that does use 64-bit writes. Put through some unit tests, which exercise all combinations of nasty input parameters (pointers with various alignments, various odd and even sizes, corner cases of content to write (-1, 256)). Change-Id: I28ddd3d388cc4989030f1a70447581985368d5bb Signed-off-by: Andre Przywara <andre.przywara@arm.com>
Diffstat (limited to 'lib')
-rw-r--r--lib/libc/memset.c34
1 files changed, 32 insertions, 2 deletions
diff --git a/lib/libc/memset.c b/lib/libc/memset.c
index d8007d8e94..f9dd4c5dbe 100644
--- a/lib/libc/memset.c
+++ b/lib/libc/memset.c
@@ -1,18 +1,48 @@
/*
- * Copyright (c) 2013-2019, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2020, ARM Limited and Contributors. All rights reserved.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
#include <stddef.h>
#include <string.h>
+#include <stdint.h>
void *memset(void *dst, int val, size_t count)
{
char *ptr = dst;
+ uint64_t *ptr64;
+ uint64_t fill = (unsigned char)val;
- while (count--)
+ /* Simplify code below by making sure we write at least one byte. */
+ if (count == 0) {
+ return dst;
+ }
+
+ /* Handle the first part, until the pointer becomes 64-bit aligned. */
+ while (((uintptr_t)ptr & 7)) {
+ *ptr++ = val;
+ if (--count == 0) {
+ return dst;
+ }
+ }
+
+ /* Duplicate the fill byte to the rest of the 64-bit word. */
+ fill |= fill << 8;
+ fill |= fill << 16;
+ fill |= fill << 32;
+
+ /* Use 64-bit writes for as long as possible. */
+ ptr64 = (void *)ptr;
+ for (; count >= 8; count -= 8) {
+ *ptr64++ = fill;
+ }
+
+ /* Handle the remaining part byte-per-byte. */
+ ptr = (void *)ptr64;
+ while (count--) {
*ptr++ = val;
+ }
return dst;
}