Blame - arch/x86/lib/memcpy_64.S - hafnium/third_party/linux.git

blob: 92748660ba51234f31a651d9181f50605418f0c9 [file] [log] [blame]

David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame^]	1	/* SPDX-License-Identifier: GPL-2.0-only */
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2	/* Copyright 2002 Andi Kleen */
				3
				4	#include <linux/linkage.h>
				5	#include <asm/errno.h>
				6	#include <asm/cpufeatures.h>
				7	#include <asm/mcsafe_test.h>
				8	#include <asm/alternative-asm.h>
				9	#include <asm/export.h>
				10
				11	/*
				12	* We build a jump to memcpy_orig by default which gets NOPped out on
				13	* the majority of x86 CPUs which set REP_GOOD. In addition, CPUs which
				14	* have the enhanced REP MOVSB/STOSB feature (ERMS), change those NOPs
				15	* to a jmp to memcpy_erms which does the REP; MOVSB mem copy.
				16	*/
				17
				18	.weak memcpy
				19
				20	/*
				21	* memcpy - Copy a memory block.
				22	*
				23	* Input:
				24	* rdi destination
				25	* rsi source
				26	* rdx count
				27	*
				28	* Output:
				29	* rax original destination
				30	*/
				31	ENTRY(__memcpy)
				32	ENTRY(memcpy)
				33	ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \
				34	"jmp memcpy_erms", X86_FEATURE_ERMS
				35
				36	movq %rdi, %rax
				37	movq %rdx, %rcx
				38	shrq $3, %rcx
				39	andl $7, %edx
				40	rep movsq
				41	movl %edx, %ecx
				42	rep movsb
				43	ret
				44	ENDPROC(memcpy)
				45	ENDPROC(__memcpy)
				46	EXPORT_SYMBOL(memcpy)
				47	EXPORT_SYMBOL(__memcpy)
				48
				49	/*
				50	* memcpy_erms() - enhanced fast string memcpy. This is faster and
				51	* simpler than memcpy. Use memcpy_erms when possible.
				52	*/
				53	ENTRY(memcpy_erms)
				54	movq %rdi, %rax
				55	movq %rdx, %rcx
				56	rep movsb
				57	ret
				58	ENDPROC(memcpy_erms)
				59
				60	ENTRY(memcpy_orig)
				61	movq %rdi, %rax
				62
				63	cmpq $0x20, %rdx
				64	jb .Lhandle_tail
				65
				66	/*
				67	* We check whether memory false dependence could occur,
				68	* then jump to corresponding copy mode.
				69	*/
				70	cmp %dil, %sil
				71	jl .Lcopy_backward
				72	subq $0x20, %rdx
				73	.Lcopy_forward_loop:
				74	subq $0x20, %rdx
				75
				76	/*
				77	* Move in blocks of 4x8 bytes:
				78	*/
				79	movq 0*8(%rsi), %r8
				80	movq 1*8(%rsi), %r9
				81	movq 2*8(%rsi), %r10
				82	movq 3*8(%rsi), %r11
				83	leaq 4*8(%rsi), %rsi
				84
				85	movq %r8, 0*8(%rdi)
				86	movq %r9, 1*8(%rdi)
				87	movq %r10, 2*8(%rdi)
				88	movq %r11, 3*8(%rdi)
				89	leaq 4*8(%rdi), %rdi
				90	jae .Lcopy_forward_loop
				91	addl $0x20, %edx
				92	jmp .Lhandle_tail
				93
				94	.Lcopy_backward:
				95	/*
				96	* Calculate copy position to tail.
				97	*/
				98	addq %rdx, %rsi
				99	addq %rdx, %rdi
				100	subq $0x20, %rdx
				101	/*
				102	* At most 3 ALU operations in one cycle,
				103	* so append NOPS in the same 16 bytes trunk.
				104	*/
				105	.p2align 4
				106	.Lcopy_backward_loop:
				107	subq $0x20, %rdx
				108	movq -1*8(%rsi), %r8
				109	movq -2*8(%rsi), %r9
				110	movq -3*8(%rsi), %r10
				111	movq -4*8(%rsi), %r11
				112	leaq -4*8(%rsi), %rsi
				113	movq %r8, -1*8(%rdi)
				114	movq %r9, -2*8(%rdi)
				115	movq %r10, -3*8(%rdi)
				116	movq %r11, -4*8(%rdi)
				117	leaq -4*8(%rdi), %rdi
				118	jae .Lcopy_backward_loop
				119
				120	/*
				121	* Calculate copy position to head.
				122	*/
				123	addl $0x20, %edx
				124	subq %rdx, %rsi
				125	subq %rdx, %rdi
				126	.Lhandle_tail:
				127	cmpl $16, %edx
				128	jb .Lless_16bytes
				129
				130	/*
				131	* Move data from 16 bytes to 31 bytes.
				132	*/
				133	movq 0*8(%rsi), %r8
				134	movq 1*8(%rsi), %r9
				135	movq -2*8(%rsi, %rdx), %r10
				136	movq -1*8(%rsi, %rdx), %r11
				137	movq %r8, 0*8(%rdi)
				138	movq %r9, 1*8(%rdi)
				139	movq %r10, -2*8(%rdi, %rdx)
				140	movq %r11, -1*8(%rdi, %rdx)
				141	retq
				142	.p2align 4
				143	.Lless_16bytes:
				144	cmpl $8, %edx
				145	jb .Lless_8bytes
				146	/*
				147	* Move data from 8 bytes to 15 bytes.
				148	*/
				149	movq 0*8(%rsi), %r8
				150	movq -1*8(%rsi, %rdx), %r9
				151	movq %r8, 0*8(%rdi)
				152	movq %r9, -1*8(%rdi, %rdx)
				153	retq
				154	.p2align 4
				155	.Lless_8bytes:
				156	cmpl $4, %edx
				157	jb .Lless_3bytes
				158
				159	/*
				160	* Move data from 4 bytes to 7 bytes.
				161	*/
				162	movl (%rsi), %ecx
				163	movl -4(%rsi, %rdx), %r8d
				164	movl %ecx, (%rdi)
				165	movl %r8d, -4(%rdi, %rdx)
				166	retq
				167	.p2align 4
				168	.Lless_3bytes:
				169	subl $1, %edx
				170	jb .Lend
				171	/*
				172	* Move data from 1 bytes to 3 bytes.
				173	*/
				174	movzbl (%rsi), %ecx
				175	jz .Lstore_1byte
				176	movzbq 1(%rsi), %r8
				177	movzbq (%rsi, %rdx), %r9
				178	movb %r8b, 1(%rdi)
				179	movb %r9b, (%rdi, %rdx)
				180	.Lstore_1byte:
				181	movb %cl, (%rdi)
				182
				183	.Lend:
				184	retq
				185	ENDPROC(memcpy_orig)
				186
				187	#ifndef CONFIG_UML
				188
				189	MCSAFE_TEST_CTL
				190
				191	/*
				192	* __memcpy_mcsafe - memory copy with machine check exception handling
				193	* Note that we only catch machine checks when reading the source addresses.
				194	* Writes to target are posted and don't generate machine checks.
				195	*/
				196	ENTRY(__memcpy_mcsafe)
				197	cmpl $8, %edx
				198	/* Less than 8 bytes? Go to byte copy loop */
				199	jb .L_no_whole_words
				200
				201	/* Check for bad alignment of source */
				202	testl $7, %esi
				203	/* Already aligned */
				204	jz .L_8byte_aligned
				205
				206	/* Copy one byte at a time until source is 8-byte aligned */
				207	movl %esi, %ecx
				208	andl $7, %ecx
				209	subl $8, %ecx
				210	negl %ecx
				211	subl %ecx, %edx
				212	.L_read_leading_bytes:
				213	movb (%rsi), %al
				214	MCSAFE_TEST_SRC %rsi 1 .E_leading_bytes
				215	MCSAFE_TEST_DST %rdi 1 .E_leading_bytes
				216	.L_write_leading_bytes:
				217	movb %al, (%rdi)
				218	incq %rsi
				219	incq %rdi
				220	decl %ecx
				221	jnz .L_read_leading_bytes
				222
				223	.L_8byte_aligned:
				224	movl %edx, %ecx
				225	andl $7, %edx
				226	shrl $3, %ecx
				227	jz .L_no_whole_words
				228
				229	.L_read_words:
				230	movq (%rsi), %r8
				231	MCSAFE_TEST_SRC %rsi 8 .E_read_words
				232	MCSAFE_TEST_DST %rdi 8 .E_write_words
				233	.L_write_words:
				234	movq %r8, (%rdi)
				235	addq $8, %rsi
				236	addq $8, %rdi
				237	decl %ecx
				238	jnz .L_read_words
				239
				240	/* Any trailing bytes? */
				241	.L_no_whole_words:
				242	andl %edx, %edx
				243	jz .L_done_memcpy_trap
				244
				245	/* Copy trailing bytes */
				246	movl %edx, %ecx
				247	.L_read_trailing_bytes:
				248	movb (%rsi), %al
				249	MCSAFE_TEST_SRC %rsi 1 .E_trailing_bytes
				250	MCSAFE_TEST_DST %rdi 1 .E_trailing_bytes
				251	.L_write_trailing_bytes:
				252	movb %al, (%rdi)
				253	incq %rsi
				254	incq %rdi
				255	decl %ecx
				256	jnz .L_read_trailing_bytes
				257
				258	/* Copy successful. Return zero */
				259	.L_done_memcpy_trap:
				260	xorl %eax, %eax
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame^]	261	.L_done:
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	262	ret
				263	ENDPROC(__memcpy_mcsafe)
				264	EXPORT_SYMBOL_GPL(__memcpy_mcsafe)
				265
				266	.section .fixup, "ax"
				267	/*
				268	* Return number of bytes not copied for any failure. Note that
				269	* there is no "tail" handling since the source buffer is 8-byte
				270	* aligned and poison is cacheline aligned.
				271	*/
				272	.E_read_words:
				273	shll $3, %ecx
				274	.E_leading_bytes:
				275	addl %edx, %ecx
				276	.E_trailing_bytes:
				277	mov %ecx, %eax
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame^]	278	jmp .L_done
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	279
				280	/*
				281	* For write fault handling, given the destination is unaligned,
				282	* we handle faults on multi-byte writes with a byte-by-byte
				283	* copy up to the write-protected page.
				284	*/
				285	.E_write_words:
				286	shll $3, %ecx
				287	addl %edx, %ecx
				288	movl %ecx, %edx
				289	jmp mcsafe_handle_tail
				290
				291	.previous
				292
				293	_ASM_EXTABLE_FAULT(.L_read_leading_bytes, .E_leading_bytes)
				294	_ASM_EXTABLE_FAULT(.L_read_words, .E_read_words)
				295	_ASM_EXTABLE_FAULT(.L_read_trailing_bytes, .E_trailing_bytes)
				296	_ASM_EXTABLE(.L_write_leading_bytes, .E_leading_bytes)
				297	_ASM_EXTABLE(.L_write_words, .E_write_words)
				298	_ASM_EXTABLE(.L_write_trailing_bytes, .E_trailing_bytes)
				299	#endif