linux/arch/s390/kernel/fpu.c
Heiko Carstens 8c09871a95 s390/fpu: limit save and restore to used registers
The first invocation of kernel_fpu_begin() after switching from user to
kernel context will save all vector registers, even if only parts of the
vector registers are used within the kernel fpu context. Given that save
and restore of all vector registers is quite expensive change the current
approach in several ways:

- Instead of saving and restoring all user registers limit this to those
  registers which are actually used within an kernel fpu context.

- On context switch save all remaining user fpu registers, so they can be
  restored when the task is rescheduled.

- Saving user registers within kernel_fpu_begin() is done without disabling
  and enabling interrupts - which also slightly reduces runtime. In worst
  case (e.g. interrupt context uses the same registers) this may lead to
  the situation that registers are saved several times, however the
  assumption is that this will not happen frequently, so that the new
  method is faster in nearly all cases.

- save_user_fpu_regs() can still be called from all contexts and saves all
  (or all remaining) user registers to a tasks ufpu user fpu save area.

Overall this reduces the time required to save and restore the user fpu
context for nearly all cases.

Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
2024-02-16 14:30:16 +01:00

194 lines
4.2 KiB
C

// SPDX-License-Identifier: GPL-2.0
/*
* In-kernel vector facility support functions
*
* Copyright IBM Corp. 2015
* Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
*/
#include <linux/kernel.h>
#include <linux/cpu.h>
#include <linux/sched.h>
#include <asm/fpu.h>
void __kernel_fpu_begin(struct kernel_fpu *state, int flags)
{
__vector128 *vxrs = state->vxrs;
int mask;
/*
* Limit the save to the FPU/vector registers already
* in use by the previous context.
*/
flags &= state->hdr.mask;
if (flags & KERNEL_FPC)
fpu_stfpc(&state->hdr.fpc);
if (!cpu_has_vx()) {
if (flags & KERNEL_VXR_LOW)
save_fp_regs_vx(vxrs);
return;
}
mask = flags & KERNEL_VXR;
if (mask == KERNEL_VXR) {
vxrs += fpu_vstm(0, 15, vxrs);
vxrs += fpu_vstm(16, 31, vxrs);
return;
}
if (mask == KERNEL_VXR_MID) {
vxrs += fpu_vstm(8, 23, vxrs);
return;
}
mask = flags & KERNEL_VXR_LOW;
if (mask) {
if (mask == KERNEL_VXR_LOW)
vxrs += fpu_vstm(0, 15, vxrs);
else if (mask == KERNEL_VXR_V0V7)
vxrs += fpu_vstm(0, 7, vxrs);
else
vxrs += fpu_vstm(8, 15, vxrs);
}
mask = flags & KERNEL_VXR_HIGH;
if (mask) {
if (mask == KERNEL_VXR_HIGH)
vxrs += fpu_vstm(16, 31, vxrs);
else if (mask == KERNEL_VXR_V16V23)
vxrs += fpu_vstm(16, 23, vxrs);
else
vxrs += fpu_vstm(24, 31, vxrs);
}
}
EXPORT_SYMBOL(__kernel_fpu_begin);
void __kernel_fpu_end(struct kernel_fpu *state, int flags)
{
__vector128 *vxrs = state->vxrs;
int mask;
/*
* Limit the restore to the FPU/vector registers of the
* previous context that have been overwritten by the
* current context.
*/
flags &= state->hdr.mask;
if (flags & KERNEL_FPC)
fpu_lfpc(&state->hdr.fpc);
if (!cpu_has_vx()) {
if (flags & KERNEL_VXR_LOW)
load_fp_regs_vx(vxrs);
return;
}
mask = flags & KERNEL_VXR;
if (mask == KERNEL_VXR) {
vxrs += fpu_vlm(0, 15, vxrs);
vxrs += fpu_vlm(16, 31, vxrs);
return;
}
if (mask == KERNEL_VXR_MID) {
vxrs += fpu_vlm(8, 23, vxrs);
return;
}
mask = flags & KERNEL_VXR_LOW;
if (mask) {
if (mask == KERNEL_VXR_LOW)
vxrs += fpu_vlm(0, 15, vxrs);
else if (mask == KERNEL_VXR_V0V7)
vxrs += fpu_vlm(0, 7, vxrs);
else
vxrs += fpu_vlm(8, 15, vxrs);
}
mask = flags & KERNEL_VXR_HIGH;
if (mask) {
if (mask == KERNEL_VXR_HIGH)
vxrs += fpu_vlm(16, 31, vxrs);
else if (mask == KERNEL_VXR_V16V23)
vxrs += fpu_vlm(16, 23, vxrs);
else
vxrs += fpu_vlm(24, 31, vxrs);
}
}
EXPORT_SYMBOL(__kernel_fpu_end);
void load_fpu_state(struct fpu *state, int flags)
{
__vector128 *vxrs = &state->vxrs[0];
int mask;
if (flags & KERNEL_FPC)
fpu_lfpc(&state->fpc);
if (!cpu_has_vx()) {
if (flags & KERNEL_VXR_V0V7)
load_fp_regs_vx(state->vxrs);
return;
}
mask = flags & KERNEL_VXR;
if (mask == KERNEL_VXR) {
fpu_vlm(0, 15, &vxrs[0]);
fpu_vlm(16, 31, &vxrs[16]);
return;
}
if (mask == KERNEL_VXR_MID) {
fpu_vlm(8, 23, &vxrs[8]);
return;
}
mask = flags & KERNEL_VXR_LOW;
if (mask) {
if (mask == KERNEL_VXR_LOW)
fpu_vlm(0, 15, &vxrs[0]);
else if (mask == KERNEL_VXR_V0V7)
fpu_vlm(0, 7, &vxrs[0]);
else
fpu_vlm(8, 15, &vxrs[8]);
}
mask = flags & KERNEL_VXR_HIGH;
if (mask) {
if (mask == KERNEL_VXR_HIGH)
fpu_vlm(16, 31, &vxrs[16]);
else if (mask == KERNEL_VXR_V16V23)
fpu_vlm(16, 23, &vxrs[16]);
else
fpu_vlm(24, 31, &vxrs[24]);
}
}
void save_fpu_state(struct fpu *state, int flags)
{
__vector128 *vxrs = &state->vxrs[0];
int mask;
if (flags & KERNEL_FPC)
fpu_stfpc(&state->fpc);
if (!cpu_has_vx()) {
if (flags & KERNEL_VXR_LOW)
save_fp_regs_vx(state->vxrs);
return;
}
mask = flags & KERNEL_VXR;
if (mask == KERNEL_VXR) {
fpu_vstm(0, 15, &vxrs[0]);
fpu_vstm(16, 31, &vxrs[16]);
return;
}
if (mask == KERNEL_VXR_MID) {
fpu_vstm(8, 23, &vxrs[8]);
return;
}
mask = flags & KERNEL_VXR_LOW;
if (mask) {
if (mask == KERNEL_VXR_LOW)
fpu_vstm(0, 15, &vxrs[0]);
else if (mask == KERNEL_VXR_V0V7)
fpu_vstm(0, 7, &vxrs[0]);
else
fpu_vstm(8, 15, &vxrs[8]);
}
mask = flags & KERNEL_VXR_HIGH;
if (mask) {
if (mask == KERNEL_VXR_HIGH)
fpu_vstm(16, 31, &vxrs[16]);
else if (mask == KERNEL_VXR_V16V23)
fpu_vstm(16, 23, &vxrs[16]);
else
fpu_vstm(24, 31, &vxrs[24]);
}
}
EXPORT_SYMBOL(save_fpu_state);