The main part of this branch is the ongoing fight against windmills in

an attempt to have userspace tools not poke at naked MSRs. This round
 deals with MSR_IA32_ENERGY_PERF_BIAS and removes direct poking into it
 by our in-tree tools in favor of the proper "energy_perf_bias" sysfs
 interface which we already have.
 
 In addition, the msr.ko write filtering's error message points to a new
 summary page which contains the info we collected from helpful reporters
 about which userspace tools write MSRs:
 
   https://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git/about
 
 along with the current status of their conversion.
 
 Rest is the usual small fixes and improvements.
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEEzv7L6UO9uDPlPSfHEsHwGGHeVUoFAl/XVKYACgkQEsHwGGHe
 VUondg//fv3aQM3KtWE7sxv6BjpiUNozPBELRuKo+EskHSxHudRhBxzdSMM7WgKq
 2uojb2CQtzRzYhHuiXjXKfbB7Ci/Jo4EDCJW2otpiqit7/UgXu15Q5ypCUMIteiV
 u9A2w3oN3GPR5TuofLWCffaotVMpFok3u7jX7RxEQPWmZqJItTwZpqYLeyniHaKM
 c6taAxZVyV13iejRhxim2zkl/hMXpjA8I+8CqWIL25J7GYlYeWLWxWYmHIQTs0NM
 zSIyr47RD8RRXVeRdeJMxnQblKE1zrObIV1fUXXu1dSW47DkrrcOQwEMorNjPtPA
 FR5Xhi+TX8JrBasMpwCnV/CTj6Ua8UsMfwQcPOFnXALPj87HfFSypa5BpnBH5xTW
 PaiatRmiNJm3g79ncaTvXCksMbb4WANqOYK+gsGYvtKbfLR+caWT6vytjZA6sC6x
 laynstV9PFUyewdwjjAjilhArzV+y+5RsRudBK8xSjcawbyV4ZEorNKYS9qrhm+y
 7CAM9A8fCQiO6POr6W7HcfmkUOHC9PLhtyjdJH89tAmaf+sfvaczzx3awwSuKx7P
 0rJlDiJP1v7yEpOMWHbpGIqjMBaWK4y3mb4g3UwFpHpo8cTl+WXZQppOPIBn9GA9
 ASLYT/ze7zk1Ua2V88qoXiC5AEvqBnSq4fp2pmf06ROZgBnYT6o=
 =ISyk
 -----END PGP SIGNATURE-----

Merge tag 'x86_misc_for_v5.11' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull misc x86 updates from Borislav Petkov:
 "The main part of this branch is the ongoing fight against windmills in
  an attempt to have userspace tools not poke at naked MSRs.

  This round deals with MSR_IA32_ENERGY_PERF_BIAS and removes direct
  poking into it by our in-tree tools in favor of the proper
  "energy_perf_bias" sysfs interface which we already have.

  In addition, the msr.ko write filtering's error message points to a
  new summary page which contains the info we collected from helpful
  reporters about which userspace tools write MSRs:

      https://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git/about

  along with the current status of their conversion.

  The rest is the usual small fixes and improvements"

* tag 'x86_misc_for_v5.11' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/msr: Add a pointer to an URL which contains further details
  x86/pci: Fix the function type for check_reserved_t
  selftests/x86: Add missing .note.GNU-stack sections
  selftests/x86/fsgsbase: Fix GS == 1, 2, and 3 tests
  x86/msr: Downgrade unrecognized MSR message
  x86/msr: Do not allow writes to MSR_IA32_ENERGY_PERF_BIAS
  tools/power/x86_energy_perf_policy: Read energy_perf_bias from sysfs
  tools/power/turbostat: Read energy_perf_bias from sysfs
  tools/power/cpupower: Read energy_perf_bias from sysfs
  MAINTAINERS: Cleanup SGI-related entries
This commit is contained in:
Linus Torvalds 2020-12-14 13:29:34 -08:00
commit 9c70f04678
15 changed files with 227 additions and 61 deletions

View file

@ -15905,13 +15905,14 @@ F: include/linux/sfp.h
K: phylink\.h|struct\s+phylink|\.phylink|>phylink_|phylink_(autoneg|clear|connect|create|destroy|disconnect|ethtool|helper|mac|mii|of|set|start|stop|test|validate)
SGI GRU DRIVER
M: Dimitri Sivanich <sivanich@sgi.com>
M: Dimitri Sivanich <dimitri.sivanich@hpe.com>
S: Maintained
F: drivers/misc/sgi-gru/
SGI XP/XPC/XPNET DRIVER
M: Cliff Whickman <cpw@sgi.com>
M: Robin Holt <robinmholt@gmail.com>
M: Steve Wahl <steve.wahl@hpe.com>
R: Mike Travis <mike.travis@hpe.com>
S: Maintained
F: drivers/misc/sgi-xp/
@ -19166,6 +19167,7 @@ F: arch/x86/platform
X86 PLATFORM UV HPE SUPERDOME FLEX
M: Steve Wahl <steve.wahl@hpe.com>
R: Mike Travis <mike.travis@hpe.com>
R: Dimitri Sivanich <dimitri.sivanich@hpe.com>
R: Russ Anderson <russ.anderson@hpe.com>
S: Supported

View file

@ -99,11 +99,9 @@ static int filter_write(u32 reg)
if (!__ratelimit(&fw_rs))
return 0;
if (reg == MSR_IA32_ENERGY_PERF_BIAS)
return 0;
pr_err("Write to unrecognized MSR 0x%x by %s (pid: %d). Please report to x86@kernel.org.\n",
reg, current->comm, current->pid);
pr_warn("Write to unrecognized MSR 0x%x by %s (pid: %d).\n",
reg, current->comm, current->pid);
pr_warn("See https://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git/about for details.\n");
return 0;
}

View file

@ -425,7 +425,7 @@ static acpi_status find_mboard_resource(acpi_handle handle, u32 lvl,
return AE_OK;
}
static bool is_acpi_reserved(u64 start, u64 end, unsigned not_used)
static bool is_acpi_reserved(u64 start, u64 end, enum e820_type not_used)
{
struct resource mcfg_res;
@ -442,7 +442,7 @@ static bool is_acpi_reserved(u64 start, u64 end, unsigned not_used)
return mcfg_res.flags;
}
typedef bool (*check_reserved_t)(u64 start, u64 end, unsigned type);
typedef bool (*check_reserved_t)(u64 start, u64 end, enum e820_type type);
static bool __ref is_mmconf_reserved(check_reserved_t is_reserved,
struct pci_mmcfg_region *cfg,

View file

@ -16,8 +16,8 @@
unsigned int cpupower_read_sysfs(const char *path, char *buf, size_t buflen)
{
int fd;
ssize_t numread;
int fd;
fd = open(path, O_RDONLY);
if (fd == -1)
@ -35,6 +35,27 @@ unsigned int cpupower_read_sysfs(const char *path, char *buf, size_t buflen)
return (unsigned int) numread;
}
unsigned int cpupower_write_sysfs(const char *path, char *buf, size_t buflen)
{
ssize_t numwritten;
int fd;
fd = open(path, O_WRONLY);
if (fd == -1)
return 0;
numwritten = write(fd, buf, buflen - 1);
if (numwritten < 1) {
perror(path);
close(fd);
return -1;
}
close(fd);
return (unsigned int) numwritten;
}
/*
* Detect whether a CPU is online
*

View file

@ -1,6 +1,11 @@
/* SPDX-License-Identifier: GPL-2.0 */
#define PATH_TO_CPU "/sys/devices/system/cpu/"
#ifndef MAX_LINE_LEN
#define MAX_LINE_LEN 4096
#endif
#define SYSFS_PATH_MAX 255
unsigned int cpupower_read_sysfs(const char *path, char *buf, size_t buflen);
unsigned int cpupower_write_sysfs(const char *path, char *buf, size_t buflen);

View file

@ -101,7 +101,7 @@ int cmd_info(int argc, char **argv)
}
if (params.perf_bias) {
ret = msr_intel_get_perf_bias(cpu);
ret = cpupower_intel_get_perf_bias(cpu);
if (ret < 0) {
fprintf(stderr,
_("Could not read perf-bias value[%d]\n"), ret);

View file

@ -95,7 +95,7 @@ int cmd_set(int argc, char **argv)
}
if (params.perf_bias) {
ret = msr_intel_set_perf_bias(cpu, perf_bias);
ret = cpupower_intel_set_perf_bias(cpu, perf_bias);
if (ret) {
fprintf(stderr, _("Error setting perf-bias "
"value on CPU %d\n"), cpu);

View file

@ -105,8 +105,8 @@ extern struct cpupower_cpu_info cpupower_cpu_info;
extern int read_msr(int cpu, unsigned int idx, unsigned long long *val);
extern int write_msr(int cpu, unsigned int idx, unsigned long long val);
extern int msr_intel_set_perf_bias(unsigned int cpu, unsigned int val);
extern int msr_intel_get_perf_bias(unsigned int cpu);
extern int cpupower_intel_set_perf_bias(unsigned int cpu, unsigned int val);
extern int cpupower_intel_get_perf_bias(unsigned int cpu);
extern unsigned long long msr_intel_get_turbo_ratio(unsigned int cpu);
/* Read/Write msr ****************************/
@ -150,9 +150,9 @@ static inline int read_msr(int cpu, unsigned int idx, unsigned long long *val)
{ return -1; };
static inline int write_msr(int cpu, unsigned int idx, unsigned long long val)
{ return -1; };
static inline int msr_intel_set_perf_bias(unsigned int cpu, unsigned int val)
static inline int cpupower_intel_set_perf_bias(unsigned int cpu, unsigned int val)
{ return -1; };
static inline int msr_intel_get_perf_bias(unsigned int cpu)
static inline int cpupower_intel_get_perf_bias(unsigned int cpu)
{ return -1; };
static inline unsigned long long msr_intel_get_turbo_ratio(unsigned int cpu)
{ return 0; };

View file

@ -1,7 +1,15 @@
// SPDX-License-Identifier: GPL-2.0
#include <stdio.h>
#include <errno.h>
#include <stdlib.h>
#if defined(__i386__) || defined(__x86_64__)
#include "helpers/helpers.h"
#include "helpers/sysfs.h"
#include "cpupower_intern.h"
#define MSR_AMD_HWCR 0xc0010015
@ -40,4 +48,44 @@ int cpufreq_has_boost_support(unsigned int cpu, int *support, int *active,
*support = *active = 1;
return 0;
}
int cpupower_intel_get_perf_bias(unsigned int cpu)
{
char linebuf[MAX_LINE_LEN];
char path[SYSFS_PATH_MAX];
unsigned long val;
char *endp;
if (!(cpupower_cpu_info.caps & CPUPOWER_CAP_PERF_BIAS))
return -1;
snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/power/energy_perf_bias", cpu);
if (cpupower_read_sysfs(path, linebuf, MAX_LINE_LEN) == 0)
return -1;
val = strtol(linebuf, &endp, 0);
if (endp == linebuf || errno == ERANGE)
return -1;
return val;
}
int cpupower_intel_set_perf_bias(unsigned int cpu, unsigned int val)
{
char path[SYSFS_PATH_MAX];
char linebuf[3] = {};
if (!(cpupower_cpu_info.caps & CPUPOWER_CAP_PERF_BIAS))
return -1;
snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/power/energy_perf_bias", cpu);
snprintf(linebuf, sizeof(linebuf), "%d", val);
if (cpupower_write_sysfs(path, linebuf, 3) <= 0)
return -1;
return 0;
}
#endif /* #if defined(__i386__) || defined(__x86_64__) */

View file

@ -11,7 +11,6 @@
/* Intel specific MSRs */
#define MSR_IA32_PERF_STATUS 0x198
#define MSR_IA32_MISC_ENABLES 0x1a0
#define MSR_IA32_ENERGY_PERF_BIAS 0x1b0
#define MSR_NEHALEM_TURBO_RATIO_LIMIT 0x1ad
/*
@ -73,33 +72,6 @@ int write_msr(int cpu, unsigned int idx, unsigned long long val)
return -1;
}
int msr_intel_get_perf_bias(unsigned int cpu)
{
unsigned long long val;
int ret;
if (!(cpupower_cpu_info.caps & CPUPOWER_CAP_PERF_BIAS))
return -1;
ret = read_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &val);
if (ret)
return ret;
return val;
}
int msr_intel_set_perf_bias(unsigned int cpu, unsigned int val)
{
int ret;
if (!(cpupower_cpu_info.caps & CPUPOWER_CAP_PERF_BIAS))
return -1;
ret = write_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, val);
if (ret)
return ret;
return 0;
}
unsigned long long msr_intel_get_turbo_ratio(unsigned int cpu)
{
unsigned long long val;

View file

@ -1831,6 +1831,25 @@ int get_mp(int cpu, struct msr_counter *mp, unsigned long long *counterp)
return 0;
}
int get_epb(int cpu)
{
char path[128 + PATH_BYTES];
int ret, epb = -1;
FILE *fp;
sprintf(path, "/sys/devices/system/cpu/cpu%d/power/energy_perf_bias", cpu);
fp = fopen_or_die(path, "r");
ret = fscanf(fp, "%d", &epb);
if (ret != 1)
err(1, "%s(%s)", __func__, path);
fclose(fp);
return epb;
}
void get_apic_id(struct thread_data *t)
{
unsigned int eax, ebx, ecx, edx;
@ -3917,9 +3936,8 @@ dump_sysfs_pstate_config(void)
*/
int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p)
{
unsigned long long msr;
char *epb_string;
int cpu;
int cpu, epb;
if (!has_epb)
return 0;
@ -3935,10 +3953,11 @@ int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p)
return -1;
}
if (get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr))
epb = get_epb(cpu);
if (epb < 0)
return 0;
switch (msr & 0xF) {
switch (epb) {
case ENERGY_PERF_BIAS_PERFORMANCE:
epb_string = "performance";
break;
@ -3952,7 +3971,7 @@ int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p)
epb_string = "custom";
break;
}
fprintf(outf, "cpu%d: MSR_IA32_ENERGY_PERF_BIAS: 0x%08llx (%s)\n", cpu, msr, epb_string);
fprintf(outf, "cpu%d: EPB: %d (%s)\n", cpu, epb, epb_string);
return 0;
}

View file

@ -91,6 +91,9 @@ unsigned int has_hwp_request_pkg; /* IA32_HWP_REQUEST_PKG */
unsigned int bdx_highest_ratio;
#define PATH_TO_CPU "/sys/devices/system/cpu/"
#define SYSFS_PATH_MAX 255
/*
* maintain compatibility with original implementation, but don't document it:
*/
@ -721,6 +724,48 @@ int put_msr(int cpu, int offset, unsigned long long new_msr)
return 0;
}
static unsigned int read_sysfs(const char *path, char *buf, size_t buflen)
{
ssize_t numread;
int fd;
fd = open(path, O_RDONLY);
if (fd == -1)
return 0;
numread = read(fd, buf, buflen - 1);
if (numread < 1) {
close(fd);
return 0;
}
buf[numread] = '\0';
close(fd);
return (unsigned int) numread;
}
static unsigned int write_sysfs(const char *path, char *buf, size_t buflen)
{
ssize_t numwritten;
int fd;
fd = open(path, O_WRONLY);
if (fd == -1)
return 0;
numwritten = write(fd, buf, buflen - 1);
if (numwritten < 1) {
perror("write failed\n");
close(fd);
return -1;
}
close(fd);
return (unsigned int) numwritten;
}
void print_hwp_cap(int cpu, struct msr_hwp_cap *cap, char *str)
{
if (cpu != -1)
@ -798,17 +843,61 @@ void write_hwp_request(int cpu, struct msr_hwp_request *hwp_req, unsigned int ms
put_msr(cpu, msr_offset, msr);
}
static int get_epb(int cpu)
{
char path[SYSFS_PATH_MAX];
char linebuf[3];
char *endp;
long val;
if (!has_epb)
return -1;
snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/power/energy_perf_bias", cpu);
if (!read_sysfs(path, linebuf, 3))
return -1;
val = strtol(linebuf, &endp, 0);
if (endp == linebuf || errno == ERANGE)
return -1;
return (int)val;
}
static int set_epb(int cpu, int val)
{
char path[SYSFS_PATH_MAX];
char linebuf[3];
char *endp;
int ret;
if (!has_epb)
return -1;
snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/power/energy_perf_bias", cpu);
snprintf(linebuf, sizeof(linebuf), "%d", val);
ret = write_sysfs(path, linebuf, 3);
if (ret <= 0)
return -1;
val = strtol(linebuf, &endp, 0);
if (endp == linebuf || errno == ERANGE)
return -1;
return (int)val;
}
int print_cpu_msrs(int cpu)
{
unsigned long long msr;
struct msr_hwp_request req;
struct msr_hwp_cap cap;
int epb;
if (has_epb) {
get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr);
printf("cpu%d: EPB %u\n", cpu, (unsigned int) msr);
}
epb = get_epb(cpu);
if (epb >= 0)
printf("cpu%d: EPB %u\n", cpu, (unsigned int) epb);
if (!has_hwp)
return 0;
@ -1091,15 +1180,15 @@ int enable_hwp_on_cpu(int cpu)
int update_cpu_msrs(int cpu)
{
unsigned long long msr;
int epb;
if (update_epb) {
get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr);
put_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, new_epb);
epb = get_epb(cpu);
set_epb(cpu, new_epb);
if (verbose)
printf("cpu%d: ENERGY_PERF_BIAS old: %d new: %d\n",
cpu, (unsigned int) msr, (unsigned int) new_epb);
cpu, epb, (unsigned int) new_epb);
}
if (update_turbo) {

View file

@ -392,8 +392,8 @@ static void set_gs_and_switch_to(unsigned long local,
local = read_base(GS);
/*
* Signal delivery seems to mess up weird selectors. Put it
* back.
* Signal delivery is quite likely to change a selector
* of 1, 2, or 3 back to 0 due to IRET being defective.
*/
asm volatile ("mov %0, %%gs" : : "rm" (force_sel));
} else {
@ -411,6 +411,14 @@ static void set_gs_and_switch_to(unsigned long local,
if (base == local && sel_pre_sched == sel_post_sched) {
printf("[OK]\tGS/BASE remained 0x%hx/0x%lx\n",
sel_pre_sched, local);
} else if (base == local && sel_pre_sched >= 1 && sel_pre_sched <= 3 &&
sel_post_sched == 0) {
/*
* IRET is misdesigned and will squash selectors 1, 2, or 3
* to zero. Don't fail the test just because this happened.
*/
printf("[OK]\tGS/BASE changed from 0x%hx/0x%lx to 0x%hx/0x%lx because IRET is defective\n",
sel_pre_sched, local, sel_post_sched, base);
} else {
nerrs++;
printf("[FAIL]\tGS/BASE changed from 0x%hx/0x%lx to 0x%hx/0x%lx\n",

View file

@ -45,3 +45,5 @@ int80_and_ret:
.type int80_and_ret, @function
.size int80_and_ret, .-int80_and_ret
.section .note.GNU-stack,"",%progbits

View file

@ -57,3 +57,5 @@ call32_from_64:
ret
.size call32_from_64, .-call32_from_64
.section .note.GNU-stack,"",%progbits