blob: 273ea67e60a131b9d8cb9472d835e2867c02d6fc [file] [log] [blame]
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001/*
2 * Copyright (C) 2002 Paul Mackerras, IBM Corp.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 */
9#include <asm/processor.h>
10#include <asm/ppc_asm.h>
11#include <asm/export.h>
12#include <asm/asm-compat.h>
13#include <asm/feature-fixups.h>
14
15#ifndef SELFTEST_CASE
16/* For big-endian, 0 == most CPUs, 1 == POWER6, 2 == Cell */
17#define SELFTEST_CASE 0
18#endif
19
20 .align 7
21_GLOBAL_TOC(memcpy)
22BEGIN_FTR_SECTION
23#ifdef __LITTLE_ENDIAN__
24 cmpdi cr7,r5,0
25#else
26 std r3,-STACKFRAMESIZE+STK_REG(R31)(r1) /* save destination pointer for return value */
27#endif
28FTR_SECTION_ELSE
29#ifdef CONFIG_PPC_BOOK3S_64
30 b memcpy_power7
31#endif
32ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
33#ifdef __LITTLE_ENDIAN__
34 /* dumb little-endian memcpy that will get replaced at runtime */
35 addi r9,r3,-1
36 addi r4,r4,-1
37 beqlr cr7
38 mtctr r5
391: lbzu r10,1(r4)
40 stbu r10,1(r9)
41 bdnz 1b
42 blr
43#else
44 PPC_MTOCRF(0x01,r5)
45 cmpldi cr1,r5,16
46 neg r6,r3 # LS 3 bits = # bytes to 8-byte dest bdry
47 andi. r6,r6,7
48 dcbt 0,r4
49 blt cr1,.Lshort_copy
50/* Below we want to nop out the bne if we're on a CPU that has the
51 CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit
52 cleared.
53 At the time of writing the only CPU that has this combination of bits
54 set is Power6. */
55test_feature = (SELFTEST_CASE == 1)
56BEGIN_FTR_SECTION
57 nop
58FTR_SECTION_ELSE
59 bne .Ldst_unaligned
60ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \
61 CPU_FTR_UNALIGNED_LD_STD)
62.Ldst_aligned:
63 addi r3,r3,-16
64test_feature = (SELFTEST_CASE == 0)
65BEGIN_FTR_SECTION
66 andi. r0,r4,7
67 bne .Lsrc_unaligned
68END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
69 srdi r7,r5,4
70 ld r9,0(r4)
71 addi r4,r4,-8
72 mtctr r7
73 andi. r5,r5,7
74 bf cr7*4+0,2f
75 addi r3,r3,8
76 addi r4,r4,8
77 mr r8,r9
78 blt cr1,3f
791: ld r9,8(r4)
80 std r8,8(r3)
812: ldu r8,16(r4)
82 stdu r9,16(r3)
83 bdnz 1b
843: std r8,8(r3)
85 beq 3f
86 addi r3,r3,16
87.Ldo_tail:
88 bf cr7*4+1,1f
89 lwz r9,8(r4)
90 addi r4,r4,4
91 stw r9,0(r3)
92 addi r3,r3,4
931: bf cr7*4+2,2f
94 lhz r9,8(r4)
95 addi r4,r4,2
96 sth r9,0(r3)
97 addi r3,r3,2
982: bf cr7*4+3,3f
99 lbz r9,8(r4)
100 stb r9,0(r3)
1013: ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1) /* return dest pointer */
102 blr
103
104.Lsrc_unaligned:
105 srdi r6,r5,3
106 addi r5,r5,-16
107 subf r4,r0,r4
108 srdi r7,r5,4
109 sldi r10,r0,3
110 cmpdi cr6,r6,3
111 andi. r5,r5,7
112 mtctr r7
113 subfic r11,r10,64
114 add r5,r5,r0
115
116 bt cr7*4+0,0f
117
118 ld r9,0(r4) # 3+2n loads, 2+2n stores
119 ld r0,8(r4)
120 sld r6,r9,r10
121 ldu r9,16(r4)
122 srd r7,r0,r11
123 sld r8,r0,r10
124 or r7,r7,r6
125 blt cr6,4f
126 ld r0,8(r4)
127 # s1<< in r8, d0=(s0<<|s1>>) in r7, s3 in r0, s2 in r9, nix in r6 & r12
128 b 2f
129
1300: ld r0,0(r4) # 4+2n loads, 3+2n stores
131 ldu r9,8(r4)
132 sld r8,r0,r10
133 addi r3,r3,-8
134 blt cr6,5f
135 ld r0,8(r4)
136 srd r12,r9,r11
137 sld r6,r9,r10
138 ldu r9,16(r4)
139 or r12,r8,r12
140 srd r7,r0,r11
141 sld r8,r0,r10
142 addi r3,r3,16
143 beq cr6,3f
144
145 # d0=(s0<<|s1>>) in r12, s1<< in r6, s2>> in r7, s2<< in r8, s3 in r9
1461: or r7,r7,r6
147 ld r0,8(r4)
148 std r12,8(r3)
1492: srd r12,r9,r11
150 sld r6,r9,r10
151 ldu r9,16(r4)
152 or r12,r8,r12
153 stdu r7,16(r3)
154 srd r7,r0,r11
155 sld r8,r0,r10
156 bdnz 1b
157
1583: std r12,8(r3)
159 or r7,r7,r6
1604: std r7,16(r3)
1615: srd r12,r9,r11
162 or r12,r8,r12
163 std r12,24(r3)
164 beq 4f
165 cmpwi cr1,r5,8
166 addi r3,r3,32
167 sld r9,r9,r10
168 ble cr1,6f
169 ld r0,8(r4)
170 srd r7,r0,r11
171 or r9,r7,r9
1726:
173 bf cr7*4+1,1f
174 rotldi r9,r9,32
175 stw r9,0(r3)
176 addi r3,r3,4
1771: bf cr7*4+2,2f
178 rotldi r9,r9,16
179 sth r9,0(r3)
180 addi r3,r3,2
1812: bf cr7*4+3,3f
182 rotldi r9,r9,8
183 stb r9,0(r3)
1843: ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1) /* return dest pointer */
185 blr
186
187.Ldst_unaligned:
188 PPC_MTOCRF(0x01,r6) # put #bytes to 8B bdry into cr7
189 subf r5,r6,r5
190 li r7,0
191 cmpldi cr1,r5,16
192 bf cr7*4+3,1f
193 lbz r0,0(r4)
194 stb r0,0(r3)
195 addi r7,r7,1
1961: bf cr7*4+2,2f
197 lhzx r0,r7,r4
198 sthx r0,r7,r3
199 addi r7,r7,2
2002: bf cr7*4+1,3f
201 lwzx r0,r7,r4
202 stwx r0,r7,r3
2033: PPC_MTOCRF(0x01,r5)
204 add r4,r6,r4
205 add r3,r6,r3
206 b .Ldst_aligned
207
208.Lshort_copy:
209 bf cr7*4+0,1f
210 lwz r0,0(r4)
211 lwz r9,4(r4)
212 addi r4,r4,8
213 stw r0,0(r3)
214 stw r9,4(r3)
215 addi r3,r3,8
2161: bf cr7*4+1,2f
217 lwz r0,0(r4)
218 addi r4,r4,4
219 stw r0,0(r3)
220 addi r3,r3,4
2212: bf cr7*4+2,3f
222 lhz r0,0(r4)
223 addi r4,r4,2
224 sth r0,0(r3)
225 addi r3,r3,2
2263: bf cr7*4+3,4f
227 lbz r0,0(r4)
228 stb r0,0(r3)
2294: ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1) /* return dest pointer */
230 blr
231#endif
232EXPORT_SYMBOL(memcpy)