blob: 21165da0052d0f86ea7604715f668015563e0af2 [file] [log] [blame]
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001/* SPDX-License-Identifier: GPL-2.0 */
2#include <asm/processor.h>
3#include <asm/ppc_asm.h>
4#include <asm/reg.h>
5#include <asm/asm-offsets.h>
6#include <asm/cputable.h>
7#include <asm/thread_info.h>
8#include <asm/page.h>
9#include <asm/ptrace.h>
10#include <asm/export.h>
11#include <asm/asm-compat.h>
12
13/*
14 * Load state from memory into VMX registers including VSCR.
15 * Assumes the caller has enabled VMX in the MSR.
16 */
17_GLOBAL(load_vr_state)
18 li r4,VRSTATE_VSCR
19 lvx v0,r4,r3
20 mtvscr v0
21 REST_32VRS(0,r4,r3)
22 blr
23EXPORT_SYMBOL(load_vr_state)
24
25/*
26 * Store VMX state into memory, including VSCR.
27 * Assumes the caller has enabled VMX in the MSR.
28 */
29_GLOBAL(store_vr_state)
30 SAVE_32VRS(0, r4, r3)
31 mfvscr v0
32 li r4, VRSTATE_VSCR
33 stvx v0, r4, r3
34 blr
35EXPORT_SYMBOL(store_vr_state)
36
37/*
38 * Disable VMX for the task which had it previously,
39 * and save its vector registers in its thread_struct.
40 * Enables the VMX for use in the kernel on return.
41 * On SMP we know the VMX is free, since we give it up every
42 * switch (ie, no lazy save of the vector registers).
43 *
44 * Note that on 32-bit this can only use registers that will be
45 * restored by fast_exception_return, i.e. r3 - r6, r10 and r11.
46 */
47_GLOBAL(load_up_altivec)
48 mfmsr r5 /* grab the current MSR */
49 oris r5,r5,MSR_VEC@h
50 MTMSRD(r5) /* enable use of AltiVec now */
51 isync
52
53 /*
54 * While userspace in general ignores VRSAVE, glibc uses it as a boolean
55 * to optimise userspace context save/restore. Whenever we take an
56 * altivec unavailable exception we must set VRSAVE to something non
57 * zero. Set it to all 1s. See also the programming note in the ISA.
58 */
59 mfspr r4,SPRN_VRSAVE
60 cmpwi 0,r4,0
61 bne+ 1f
62 li r4,-1
63 mtspr SPRN_VRSAVE,r4
641:
65 /* enable use of VMX after return */
66#ifdef CONFIG_PPC32
67 mfspr r5,SPRN_SPRG_THREAD /* current task's THREAD (phys) */
68 oris r9,r9,MSR_VEC@h
69#else
70 ld r4,PACACURRENT(r13)
71 addi r5,r4,THREAD /* Get THREAD */
72 oris r12,r12,MSR_VEC@h
73 std r12,_MSR(r1)
74#endif
75 /* Don't care if r4 overflows, this is desired behaviour */
76 lbz r4,THREAD_LOAD_VEC(r5)
77 addi r4,r4,1
78 stb r4,THREAD_LOAD_VEC(r5)
79 addi r6,r5,THREAD_VRSTATE
80 li r4,1
81 li r10,VRSTATE_VSCR
82 stw r4,THREAD_USED_VR(r5)
83 lvx v0,r10,r6
84 mtvscr v0
85 REST_32VRS(0,r4,r6)
86 /* restore registers and return */
87 blr
88
89/*
90 * save_altivec(tsk)
91 * Save the vector registers to its thread_struct
92 */
93_GLOBAL(save_altivec)
94 addi r3,r3,THREAD /* want THREAD of task */
95 PPC_LL r7,THREAD_VRSAVEAREA(r3)
96 PPC_LL r5,PT_REGS(r3)
97 PPC_LCMPI 0,r7,0
98 bne 2f
99 addi r7,r3,THREAD_VRSTATE
1002: SAVE_32VRS(0,r4,r7)
101 mfvscr v0
102 li r4,VRSTATE_VSCR
103 stvx v0,r4,r7
104 blr
105
106#ifdef CONFIG_VSX
107
108#ifdef CONFIG_PPC32
109#error This asm code isn't ready for 32-bit kernels
110#endif
111
112/*
113 * load_up_vsx(unused, unused, tsk)
114 * Disable VSX for the task which had it previously,
115 * and save its vector registers in its thread_struct.
116 * Reuse the fp and vsx saves, but first check to see if they have
117 * been saved already.
118 */
119_GLOBAL(load_up_vsx)
120/* Load FP and VSX registers if they haven't been done yet */
121 andi. r5,r12,MSR_FP
122 beql+ load_up_fpu /* skip if already loaded */
123 andis. r5,r12,MSR_VEC@h
124 beql+ load_up_altivec /* skip if already loaded */
125
126 ld r4,PACACURRENT(r13)
127 addi r4,r4,THREAD /* Get THREAD */
128 li r6,1
129 stw r6,THREAD_USED_VSR(r4) /* ... also set thread used vsr */
130 /* enable use of VSX after return */
131 oris r12,r12,MSR_VSX@h
132 std r12,_MSR(r1)
133 b fast_exception_return
134
135#endif /* CONFIG_VSX */
136
137
138/*
139 * The routines below are in assembler so we can closely control the
140 * usage of floating-point registers. These routines must be called
141 * with preempt disabled.
142 */
143#ifdef CONFIG_PPC32
144 .data
145fpzero:
146 .long 0
147fpone:
148 .long 0x3f800000 /* 1.0 in single-precision FP */
149fphalf:
150 .long 0x3f000000 /* 0.5 in single-precision FP */
151
152#define LDCONST(fr, name) \
153 lis r11,name@ha; \
154 lfs fr,name@l(r11)
155#else
156
157 .section ".toc","aw"
158fpzero:
159 .tc FD_0_0[TC],0
160fpone:
161 .tc FD_3ff00000_0[TC],0x3ff0000000000000 /* 1.0 */
162fphalf:
163 .tc FD_3fe00000_0[TC],0x3fe0000000000000 /* 0.5 */
164
165#define LDCONST(fr, name) \
166 lfd fr,name@toc(r2)
167#endif
168
169 .text
170/*
171 * Internal routine to enable floating point and set FPSCR to 0.
172 * Don't call it from C; it doesn't use the normal calling convention.
173 */
174fpenable:
175#ifdef CONFIG_PPC32
176 stwu r1,-64(r1)
177#else
178 stdu r1,-64(r1)
179#endif
180 mfmsr r10
181 ori r11,r10,MSR_FP
182 mtmsr r11
183 isync
184 stfd fr0,24(r1)
185 stfd fr1,16(r1)
186 stfd fr31,8(r1)
187 LDCONST(fr1, fpzero)
188 mffs fr31
189 MTFSF_L(fr1)
190 blr
191
192fpdisable:
193 mtlr r12
194 MTFSF_L(fr31)
195 lfd fr31,8(r1)
196 lfd fr1,16(r1)
197 lfd fr0,24(r1)
198 mtmsr r10
199 isync
200 addi r1,r1,64
201 blr
202
203/*
204 * Vector add, floating point.
205 */
206_GLOBAL(vaddfp)
207 mflr r12
208 bl fpenable
209 li r0,4
210 mtctr r0
211 li r6,0
2121: lfsx fr0,r4,r6
213 lfsx fr1,r5,r6
214 fadds fr0,fr0,fr1
215 stfsx fr0,r3,r6
216 addi r6,r6,4
217 bdnz 1b
218 b fpdisable
219
220/*
221 * Vector subtract, floating point.
222 */
223_GLOBAL(vsubfp)
224 mflr r12
225 bl fpenable
226 li r0,4
227 mtctr r0
228 li r6,0
2291: lfsx fr0,r4,r6
230 lfsx fr1,r5,r6
231 fsubs fr0,fr0,fr1
232 stfsx fr0,r3,r6
233 addi r6,r6,4
234 bdnz 1b
235 b fpdisable
236
237/*
238 * Vector multiply and add, floating point.
239 */
240_GLOBAL(vmaddfp)
241 mflr r12
242 bl fpenable
243 stfd fr2,32(r1)
244 li r0,4
245 mtctr r0
246 li r7,0
2471: lfsx fr0,r4,r7
248 lfsx fr1,r5,r7
249 lfsx fr2,r6,r7
250 fmadds fr0,fr0,fr2,fr1
251 stfsx fr0,r3,r7
252 addi r7,r7,4
253 bdnz 1b
254 lfd fr2,32(r1)
255 b fpdisable
256
257/*
258 * Vector negative multiply and subtract, floating point.
259 */
260_GLOBAL(vnmsubfp)
261 mflr r12
262 bl fpenable
263 stfd fr2,32(r1)
264 li r0,4
265 mtctr r0
266 li r7,0
2671: lfsx fr0,r4,r7
268 lfsx fr1,r5,r7
269 lfsx fr2,r6,r7
270 fnmsubs fr0,fr0,fr2,fr1
271 stfsx fr0,r3,r7
272 addi r7,r7,4
273 bdnz 1b
274 lfd fr2,32(r1)
275 b fpdisable
276
277/*
278 * Vector reciprocal estimate. We just compute 1.0/x.
279 * r3 -> destination, r4 -> source.
280 */
281_GLOBAL(vrefp)
282 mflr r12
283 bl fpenable
284 li r0,4
285 LDCONST(fr1, fpone)
286 mtctr r0
287 li r6,0
2881: lfsx fr0,r4,r6
289 fdivs fr0,fr1,fr0
290 stfsx fr0,r3,r6
291 addi r6,r6,4
292 bdnz 1b
293 b fpdisable
294
295/*
296 * Vector reciprocal square-root estimate, floating point.
297 * We use the frsqrte instruction for the initial estimate followed
298 * by 2 iterations of Newton-Raphson to get sufficient accuracy.
299 * r3 -> destination, r4 -> source.
300 */
301_GLOBAL(vrsqrtefp)
302 mflr r12
303 bl fpenable
304 stfd fr2,32(r1)
305 stfd fr3,40(r1)
306 stfd fr4,48(r1)
307 stfd fr5,56(r1)
308 li r0,4
309 LDCONST(fr4, fpone)
310 LDCONST(fr5, fphalf)
311 mtctr r0
312 li r6,0
3131: lfsx fr0,r4,r6
314 frsqrte fr1,fr0 /* r = frsqrte(s) */
315 fmuls fr3,fr1,fr0 /* r * s */
316 fmuls fr2,fr1,fr5 /* r * 0.5 */
317 fnmsubs fr3,fr1,fr3,fr4 /* 1 - s * r * r */
318 fmadds fr1,fr2,fr3,fr1 /* r = r + 0.5 * r * (1 - s * r * r) */
319 fmuls fr3,fr1,fr0 /* r * s */
320 fmuls fr2,fr1,fr5 /* r * 0.5 */
321 fnmsubs fr3,fr1,fr3,fr4 /* 1 - s * r * r */
322 fmadds fr1,fr2,fr3,fr1 /* r = r + 0.5 * r * (1 - s * r * r) */
323 stfsx fr1,r3,r6
324 addi r6,r6,4
325 bdnz 1b
326 lfd fr5,56(r1)
327 lfd fr4,48(r1)
328 lfd fr3,40(r1)
329 lfd fr2,32(r1)
330 b fpdisable