blob: 0051a84f60c0553bad763d53b1d86f331e6da491 [file] [log] [blame]
David Brazdil0f672f62019-12-10 10:32:29 +00001/* SPDX-License-Identifier: GPL-2.0-only */
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002/*
3 * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004 */
5
6#include <linux/linkage.h>
7
8#ifdef __LITTLE_ENDIAN__
9# define SHIFT_1(RX,RY,IMM) asl RX, RY, IMM ; <<
10# define SHIFT_2(RX,RY,IMM) lsr RX, RY, IMM ; >>
11# define MERGE_1(RX,RY,IMM) asl RX, RY, IMM
12# define MERGE_2(RX,RY,IMM)
13# define EXTRACT_1(RX,RY,IMM) and RX, RY, 0xFFFF
14# define EXTRACT_2(RX,RY,IMM) lsr RX, RY, IMM
15#else
16# define SHIFT_1(RX,RY,IMM) lsr RX, RY, IMM ; >>
17# define SHIFT_2(RX,RY,IMM) asl RX, RY, IMM ; <<
18# define MERGE_1(RX,RY,IMM) asl RX, RY, IMM ; <<
19# define MERGE_2(RX,RY,IMM) asl RX, RY, IMM ; <<
20# define EXTRACT_1(RX,RY,IMM) lsr RX, RY, IMM
21# define EXTRACT_2(RX,RY,IMM) lsr RX, RY, 0x08
22#endif
23
24#ifdef CONFIG_ARC_HAS_LL64
Andrew Scullb4b6d4a2019-01-02 15:54:55 +000025# define LOADX(DST,RX) ldd.ab DST, [RX, 8]
26# define STOREX(SRC,RX) std.ab SRC, [RX, 8]
27# define ZOLSHFT 5
28# define ZOLAND 0x1F
29#else
Andrew Scullb4b6d4a2019-01-02 15:54:55 +000030# define LOADX(DST,RX) ld.ab DST, [RX, 4]
31# define STOREX(SRC,RX) st.ab SRC, [RX, 4]
32# define ZOLSHFT 4
33# define ZOLAND 0xF
34#endif
35
36ENTRY_CFI(memcpy)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +000037 mov.f 0, r2
38;;; if size is zero
39 jz.d [blink]
40 mov r3, r0 ; don;t clobber ret val
41
42;;; if size <= 8
43 cmp r2, 8
44 bls.d @.Lsmallchunk
45 mov.f lp_count, r2
46
47 and.f r4, r0, 0x03
48 rsub lp_count, r4, 4
49 lpnz @.Laligndestination
50 ;; LOOP BEGIN
51 ldb.ab r5, [r1,1]
52 sub r2, r2, 1
53 stb.ab r5, [r3,1]
54.Laligndestination:
55
56;;; Check the alignment of the source
57 and.f r4, r1, 0x03
58 bnz.d @.Lsourceunaligned
59
60;;; CASE 0: Both source and destination are 32bit aligned
61;;; Convert len to Dwords, unfold x4
62 lsr.f lp_count, r2, ZOLSHFT
63 lpnz @.Lcopy32_64bytes
64 ;; LOOP START
65 LOADX (r6, r1)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +000066 LOADX (r8, r1)
67 LOADX (r10, r1)
68 LOADX (r4, r1)
69 STOREX (r6, r3)
70 STOREX (r8, r3)
71 STOREX (r10, r3)
72 STOREX (r4, r3)
73.Lcopy32_64bytes:
74
75 and.f lp_count, r2, ZOLAND ;Last remaining 31 bytes
76.Lsmallchunk:
77 lpnz @.Lcopyremainingbytes
78 ;; LOOP START
79 ldb.ab r5, [r1,1]
80 stb.ab r5, [r3,1]
81.Lcopyremainingbytes:
82
83 j [blink]
84;;; END CASE 0
85
86.Lsourceunaligned:
87 cmp r4, 2
88 beq.d @.LunalignedOffby2
89 sub r2, r2, 1
90
91 bhi.d @.LunalignedOffby3
92 ldb.ab r5, [r1, 1]
93
94;;; CASE 1: The source is unaligned, off by 1
95 ;; Hence I need to read 1 byte for a 16bit alignment
96 ;; and 2bytes to reach 32bit alignment
97 ldh.ab r6, [r1, 2]
98 sub r2, r2, 2
99 ;; Convert to words, unfold x2
100 lsr.f lp_count, r2, 3
101 MERGE_1 (r6, r6, 8)
102 MERGE_2 (r5, r5, 24)
103 or r5, r5, r6
104
105 ;; Both src and dst are aligned
106 lpnz @.Lcopy8bytes_1
107 ;; LOOP START
108 ld.ab r6, [r1, 4]
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000109 ld.ab r8, [r1,4]
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000110
111 SHIFT_1 (r7, r6, 24)
112 or r7, r7, r5
113 SHIFT_2 (r5, r6, 8)
114
115 SHIFT_1 (r9, r8, 24)
116 or r9, r9, r5
117 SHIFT_2 (r5, r8, 8)
118
119 st.ab r7, [r3, 4]
120 st.ab r9, [r3, 4]
121.Lcopy8bytes_1:
122
123 ;; Write back the remaining 16bits
124 EXTRACT_1 (r6, r5, 16)
125 sth.ab r6, [r3, 2]
126 ;; Write back the remaining 8bits
127 EXTRACT_2 (r5, r5, 16)
128 stb.ab r5, [r3, 1]
129
130 and.f lp_count, r2, 0x07 ;Last 8bytes
131 lpnz @.Lcopybytewise_1
132 ;; LOOP START
133 ldb.ab r6, [r1,1]
134 stb.ab r6, [r3,1]
135.Lcopybytewise_1:
136 j [blink]
137
138.LunalignedOffby2:
139;;; CASE 2: The source is unaligned, off by 2
140 ldh.ab r5, [r1, 2]
141 sub r2, r2, 1
142
143 ;; Both src and dst are aligned
144 ;; Convert to words, unfold x2
145 lsr.f lp_count, r2, 3
146#ifdef __BIG_ENDIAN__
147 asl.nz r5, r5, 16
148#endif
149 lpnz @.Lcopy8bytes_2
150 ;; LOOP START
151 ld.ab r6, [r1, 4]
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000152 ld.ab r8, [r1,4]
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000153
154 SHIFT_1 (r7, r6, 16)
155 or r7, r7, r5
156 SHIFT_2 (r5, r6, 16)
157
158 SHIFT_1 (r9, r8, 16)
159 or r9, r9, r5
160 SHIFT_2 (r5, r8, 16)
161
162 st.ab r7, [r3, 4]
163 st.ab r9, [r3, 4]
164.Lcopy8bytes_2:
165
166#ifdef __BIG_ENDIAN__
167 lsr.nz r5, r5, 16
168#endif
169 sth.ab r5, [r3, 2]
170
171 and.f lp_count, r2, 0x07 ;Last 8bytes
172 lpnz @.Lcopybytewise_2
173 ;; LOOP START
174 ldb.ab r6, [r1,1]
175 stb.ab r6, [r3,1]
176.Lcopybytewise_2:
177 j [blink]
178
179.LunalignedOffby3:
180;;; CASE 3: The source is unaligned, off by 3
181;;; Hence, I need to read 1byte for achieve the 32bit alignment
182
183 ;; Both src and dst are aligned
184 ;; Convert to words, unfold x2
185 lsr.f lp_count, r2, 3
186#ifdef __BIG_ENDIAN__
187 asl.ne r5, r5, 24
188#endif
189 lpnz @.Lcopy8bytes_3
190 ;; LOOP START
191 ld.ab r6, [r1, 4]
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000192 ld.ab r8, [r1,4]
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000193
194 SHIFT_1 (r7, r6, 8)
195 or r7, r7, r5
196 SHIFT_2 (r5, r6, 24)
197
198 SHIFT_1 (r9, r8, 8)
199 or r9, r9, r5
200 SHIFT_2 (r5, r8, 24)
201
202 st.ab r7, [r3, 4]
203 st.ab r9, [r3, 4]
204.Lcopy8bytes_3:
205
206#ifdef __BIG_ENDIAN__
207 lsr.nz r5, r5, 24
208#endif
209 stb.ab r5, [r3, 1]
210
211 and.f lp_count, r2, 0x07 ;Last 8bytes
212 lpnz @.Lcopybytewise_3
213 ;; LOOP START
214 ldb.ab r6, [r1,1]
215 stb.ab r6, [r3,1]
216.Lcopybytewise_3:
217 j [blink]
218
219END_CFI(memcpy)