Merge pull request #4866 from lengau/nvidia

Make Nvidia info not depend on /proc access
This commit is contained in:
Daniel Johnson 2023-05-31 15:10:42 -04:00 committed by GitHub
commit a245387b03
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 424 additions and 41 deletions

View file

@ -4,81 +4,186 @@ Everything in this module should rely on /proc or /sys only, no executable calls
"""
import os
import re
from typing import Dict, Iterable, List
from lutris.util.graphics.glxinfo import GlxInfo
from lutris.util.log import logger
from lutris.util.system import read_process_output
MIN_RECOMMENDED_NVIDIA_DRIVER = 415
def get_nvidia_driver_info():
def get_nvidia_driver_info() -> Dict[str, Dict[str, str]]:
"""Return information about NVidia drivers"""
version_file_path = "/proc/driver/nvidia/version"
if not os.path.exists(version_file_path):
return
with open(version_file_path, encoding='utf-8') as version_file:
content = version_file.readlines()
nvrm_version = content[0].split(': ')[1].strip().split()
if 'Open' in nvrm_version:
try:
if not os.path.exists(version_file_path):
return {}
with open(version_file_path, encoding="utf-8") as version_file:
content = version_file.readlines()
except PermissionError:
# MAC systems (selinux, apparmor) may block access to files in /proc.
# If this happens, we may still be able to retrieve the info by
# other means, but need additional validation.
logger.info("Could not access %s. Falling back to glxinfo.", version_file_path)
except OSError as e:
logger.warning(
"Unexpected error when accessing %s. Falling back to glxinfo.",
version_file_path,
exc_info=e,
)
else:
nvrm_version = content[0].split(": ")[1].strip().split()
if "Open" in nvrm_version:
return {
'nvrm': {
'vendor': nvrm_version[0],
'platform': nvrm_version[1],
'arch': nvrm_version[6],
'version': nvrm_version[7]
"nvrm": {
"vendor": nvrm_version[0],
"platform": nvrm_version[1],
"arch": nvrm_version[6],
"version": nvrm_version[7],
}
}
return {
'nvrm': {
'vendor': nvrm_version[0],
'platform': nvrm_version[1],
'arch': nvrm_version[2],
'version': nvrm_version[5],
'date': ' '.join(nvrm_version[6:])
"nvrm": {
"vendor": nvrm_version[0],
"platform": nvrm_version[1],
"arch": nvrm_version[2],
"version": nvrm_version[5],
"date": " ".join(nvrm_version[6:]),
}
}
glx_info = GlxInfo()
platform = read_process_output(["uname", "-s"])
arch = read_process_output(["uname", "-m"])
vendor = glx_info.opengl_vendor # type: ignore[attr-defined]
if "nvidia" not in vendor.lower():
logger.error("Expected NVIDIA vendor information, received %s.", vendor)
return {}
return {
"nvrm": {
"vendor": vendor,
"platform": platform,
"arch": arch,
"version": glx_info.opengl_version.rsplit(maxsplit=1)[-1], # type: ignore[attr-defined]
}
}
def get_nvidia_gpu_ids():
def get_nvidia_gpu_ids() -> List[str]:
"""Return the list of Nvidia GPUs"""
return os.listdir("/proc/driver/nvidia/gpus")
gpus_dir = "/proc/driver/nvidia/gpus"
try:
return os.listdir(gpus_dir)
except PermissionError:
logger.info("Permission denied to %s. Using lspci instead.", gpus_dir)
except OSError as e:
logger.warning(
"Unexpected error accessing %s. Using lspci instead.", gpus_dir, exc_info=e
)
values = read_process_output(
# 10de is NVIDIA's vendor ID, 0300 gets you video controllers.
["lspci", "-D", "-n", "-d", "10de::0300"],
).splitlines()
return [line.split(maxsplit=1)[0] for line in values]
def get_nvidia_gpu_info(gpu_id):
def get_nvidia_gpu_info(gpu_id: str) -> Dict[str, str]:
"""Return details about a GPU"""
with open("/proc/driver/nvidia/gpus/%s/information" % gpu_id, encoding='utf-8') as info_file:
content = info_file.readlines()
infos = {}
for line in content:
gpu_info_file = f"/proc/driver/nvidia/gpus/{gpu_id}/information"
try:
with open(gpu_info_file, encoding="utf-8") as info_file:
content = info_file.readlines()
except PermissionError:
logger.info("Permission denied to %s. Detecting with lspci.", gpu_info_file)
except OSError as e:
logger.warning(
"Unexpected error accessing %s. Detecting with lspci",
gpu_info_file,
exc_info=e,
)
else:
info = {}
for line in content:
key, value = line.split(":", 1)
info[key] = value.strip()
return info
lspci_data = read_process_output(["lspci", "-v", "-s", gpu_id])
model_info = re.search(r"NVIDIA Corporation \w+ \[(.+?)\]", lspci_data)
if model_info:
model = model_info.group(1)
else:
logger.error("Could not detect NVIDIA GPU model.")
model = "Unknown"
irq_info = re.search("IRQ ([0-9]+)", lspci_data)
if irq_info:
irq = irq_info.group(1)
else:
logger.error("Could not detect GPU IRQ information.")
irq = None
info = {
"Model": f"NVIDIA {model}",
"IRQ": irq,
"Bus Location": gpu_id,
}
for line in lspci_data.splitlines():
if ":" not in line:
continue
key, value = line.split(":", 1)
infos[key] = value.strip()
return infos
info[key.strip()] = value.strip()
return info
def is_nvidia():
"""Return true if the Nvidia drivers are currently in use"""
return os.path.exists("/proc/driver/nvidia")
def is_nvidia() -> bool:
"""Return true if the Nvidia drivers are currently in use.
Note: This function may not detect use of the nouveau drivers.
"""
try:
return os.path.exists("/proc/driver/nvidia")
except OSError:
logger.info(
"Could not determine whether /proc/driver/nvidia exists. "
"Falling back to alternative method"
)
try:
with open("/proc/modules") as f:
modules = f.read()
return bool(re.search(r"^nvidia ", modules, flags=re.MULTILINE))
except OSError:
logger.error(
"Could not access /proc/modules to find the Nvidia drivers. "
"Nvidia card may not be detected."
)
glx_info = GlxInfo()
return "NVIDIA" in glx_info.opengl_vendor # type: ignore[attr-defined]
def get_gpus():
def get_gpus() -> Iterable[str]:
"""Return GPUs connected to the system"""
if not os.path.exists("/sys/class/drm"):
logger.error("No GPU available on this system!")
return
return []
try:
cardlist = os.listdir("/sys/class/drm/")
except PermissionError:
logger.error("Your system does not allow reading from /sys/class/drm, no GPU detected.")
return
logger.error(
"Your system does not allow reading from /sys/class/drm, no GPU detected."
)
return []
for cardname in cardlist:
if re.match(r"^card\d$", cardname):
yield cardname
def get_gpu_info(card):
def get_gpu_info(card: str) -> Dict[str, str]:
"""Return information about a GPU"""
infos = {"DRIVER": "", "PCI_ID": "", "PCI_SUBSYS_ID": ""}
try:
with open("/sys/class/drm/%s/device/uevent" % card, encoding='utf-8') as card_uevent:
with open(
f"/sys/class/drm/{card}/device/uevent", encoding="utf-8"
) as card_uevent:
content = card_uevent.readlines()
except FileNotFoundError:
logger.error("Unable to read driver information for card %s", card)
@ -89,7 +194,7 @@ def get_gpu_info(card):
return infos
def is_amd():
def is_amd() -> bool:
"""Return true if the system uses the AMD driver"""
for card in get_gpus():
if get_gpu_info(card)["DRIVER"] == "amdgpu":
@ -97,22 +202,28 @@ def is_amd():
return False
def check_driver():
def check_driver() -> None:
"""Report on the currently running driver"""
if is_nvidia():
driver_info = get_nvidia_driver_info()
# pylint: disable=logging-format-interpolation
logger.info("Using {vendor} drivers {version} for {arch}".format(**driver_info["nvrm"]))
logger.info(
"Using {vendor} drivers {version} for {arch}".format(**driver_info["nvrm"])
)
gpus = get_nvidia_gpu_ids()
for gpu_id in gpus:
gpu_info = get_nvidia_gpu_info(gpu_id)
logger.info("GPU: %s", gpu_info.get("Model"))
for card in get_gpus():
# pylint: disable=logging-format-interpolation
logger.info("GPU: {PCI_ID} {PCI_SUBSYS_ID} using {DRIVER} driver".format(**get_gpu_info(card)))
logger.info(
"GPU: {PCI_ID} {PCI_SUBSYS_ID} using {DRIVER} driver".format(
**get_gpu_info(card)
)
)
def is_outdated():
def is_outdated() -> bool:
if not is_nvidia():
return False
driver_info = get_nvidia_driver_info()

View file

@ -0,0 +1,272 @@
import io
import subprocess
import unittest
from unittest.mock import patch
from lutris.util.graphics import drivers, glxinfo
PROPRIETARY_MODULE_VERSION_FILE = """\
NVRM version: NVIDIA UNIX x86_64 Kernel Module 525.105.17 Tue Mar 28 18:02:59 UTC 2023
GCC version: gcc version 11.3.0 (Ubuntu 11.3.0-1ubuntu1~22.04)
"""
PROPRIETARY_MODULE_OUTPUT = {
"nvrm": {
"vendor": "NVIDIA",
"platform": "UNIX",
"arch": "x86_64",
"version": "525.105.17",
"date": "Tue Mar 28 18:02:59 UTC 2023",
}
}
OPEN_MODULE_VERSION_FILE = """\
NVRM version: NVIDIA UNIX Open Kernel Module for x86_64 515.43.04 Release Build (archlinux-builder@archlinux)
GCC version: gcc version 12.1.0 (GCC)
"""
OPEN_MODULE_OUTPUT = {
"nvrm": {
"vendor": "NVIDIA",
"platform": "UNIX",
"arch": "x86_64",
"version": "515.43.04",
}
}
DRIVER_VERSION_FILES = (
("Proprietary", PROPRIETARY_MODULE_VERSION_FILE, PROPRIETARY_MODULE_OUTPUT),
("Open", OPEN_MODULE_VERSION_FILE, OPEN_MODULE_OUTPUT),
)
SAMPLE_GLXINFO_OUTPUT = """\
name of display: :0
display: :0 screen: 0
direct rendering: Yes
Memory info (GL_NVX_gpu_memory_info):
Dedicated video memory: 6144 MB
Total available memory: 6144 MB
Currently available dedicated video memory: 3359 MB
OpenGL vendor string: NVIDIA Corporation
OpenGL renderer string: NVIDIA GeForce GTX 1660 SUPER/PCIe/SSE2
OpenGL core profile version string: 4.6.0 NVIDIA 525.105.17
OpenGL core profile shading language version string: 4.60 NVIDIA
OpenGL core profile context flags: (none)
OpenGL core profile profile mask: core profile
OpenGL version string: 4.6.0 NVIDIA 525.105.17
OpenGL shading language version string: 4.60 NVIDIA
OpenGL context flags: (none)
OpenGL profile mask: (none)
OpenGL ES profile version string: OpenGL ES 3.2 NVIDIA 525.105.17
OpenGL ES profile shading language version string: OpenGL ES GLSL ES 3.20
"""
FAKE_GLXINFO_NVIDIA = glxinfo.GlxInfo(SAMPLE_GLXINFO_OUTPUT)
SAMPLE_GPU_INFORMATION = """\
Model: NVIDIA GeForce GTX 1660 SUPER
IRQ: 35
GPU UUID: GPU-12345678-1234-1234-1234-1234567890ab
Video BIOS: 90.16.48.00.aa
Bus Type: PCIe
DMA Size: 47 bits
DMA Mask: 0x7fffffffffff
Bus Location: 0000:01:00.0
Device Minor: 0
GPU Excluded: No
"""
SAMPLE_GPU_INFO_DICT = {
"Model": "NVIDIA GeForce GTX 1660 SUPER",
"IRQ": "35",
"GPU UUID": "GPU-12345678-1234-1234-1234-1234567890ab",
"Video BIOS": "90.16.48.00.aa",
"Bus Type": "PCIe",
"DMA Size": "47 bits",
"DMA Mask": "0x7fffffffffff",
"Bus Location": "0000:01:00.0",
"Device Minor": "0",
"GPU Excluded": "No",
}
SAMPLE_LSPCI_GPU = """\
01:00.0 VGA compatible controller: NVIDIA Corporation TU116 \
[GeForce GTX 1660 SUPER] (rev a1) (prog-if 00 [VGA controller])
Subsystem: eVga.com. Corp. TU116 [GeForce GTX 1660 SUPER]
Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR- FastB2B- DisINTx+
Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort- <TAbort- <MAbort- >SERR- <PERR- INTx-
Latency: 0
Interrupt: pin A routed to IRQ 35
Region 0: Memory at f6000000 (32-bit, non-prefetchable) [size=16M]
Region 1: Memory at e0000000 (64-bit, prefetchable) [size=256M]
Region 3: Memory at f0000000 (64-bit, prefetchable) [size=32M]
Region 5: I/O ports at e000 [size=128]
Expansion ROM at 000c0000 [virtual] [disabled] [size=128K]
Capabilities: <access denied>
Kernel driver in use: nvidia
Kernel modules: nvidiafb, nouveau, nvidia_drm, nvidia
"""
SAMPLE_PROC_MODULES = """\
nvidia_uvm 1384448 2 - Live 0x0000000000000000 (POE)
nvidia_drm 69632 26 - Live 0x0000000000000000 (POE)
nvidia_modeset 1241088 67 nvidia_drm, Live 0x0000000000000000 (POE)
uvcvideo 114688 0 - Live 0x0000000000000000
videobuf2_vmalloc 20480 1 uvcvideo, Live 0x0000000000000000
videobuf2_memops 20480 1 videobuf2_vmalloc, Live 0x0000000000000000
videobuf2_v4l2 32768 1 uvcvideo, Live 0x0000000000000000
nvidia 56500224 3776 nvidia_uvm,nvidia_modeset, Live 0x0000000000000000 (POE)
videobuf2_common 81920 4 uvcvideo,videobuf2_vmalloc,videobuf2_memops,videobuf2_v4l2, Live 0x0000000000000000
videodev 274432 3 uvcvideo,videobuf2_v4l2,videobuf2_common, Live 0x0000000000000000
mc 65536 5 uvcvideo,videobuf2_v4l2,snd_usb_audio,videobuf2_common,videodev, Live 0x0000000000000000
drm_kms_helper 200704 1 nvidia_drm, Live 0x0000000000000000
drm 581632 30 nvidia_drm,nvidia,drm_kms_helper, Live 0x0000000000000000
i2c_nvidia_gpu 16384 0 - Live 0x0000000000000000
i2c_ccgx_ucsi 16384 1 i2c_nvidia_gpu, Live 0x0000000000000000
video 65536 0 - Live 0x0000000000000000
"""
class TestGetNvidiaDriverInfo(unittest.TestCase):
def test_success_on_current_machine(self):
drivers.get_nvidia_driver_info()
@patch("os.path.exists", return_value=False)
def test_returns_none_if_file_doesnt_exist(self, mock_path_exists):
self.assertEqual(drivers.get_nvidia_driver_info(), {})
@patch("builtins.open")
@patch("os.path.exists", return_value=True)
def test_from_file(self, mock_path_exists, mock_open):
for test_type, version_file, expected in DRIVER_VERSION_FILES:
with self.subTest(test_type):
mock_open.return_value = io.StringIO(version_file)
actual = drivers.get_nvidia_driver_info()
self.assertEqual(actual, expected)
@patch(
"builtins.open",
side_effect=PermissionError(
13, "Permission Denied: '/proc/driver/nvidia/version'"
),
)
@patch("os.path.exists", return_value=True)
def test_file_errors(self, mock_path_exists, mock_open):
with patch.object(drivers, "GlxInfo", return_value=FAKE_GLXINFO_NVIDIA), patch.object(
subprocess,
"run",
side_effect=[
subprocess.CompletedProcess([], 0, stdout="Linux\n"),
subprocess.CompletedProcess([], 0, stdout="x86_64\n"),
],
):
actual = drivers.get_nvidia_driver_info()
self.assertEqual(
actual,
{
"nvrm": {
"vendor": "NVIDIA Corporation",
"platform": "Linux",
"arch": "x86_64",
"version": "525.105.17",
}
},
)
class TestGetNvidiaGpuIds(unittest.TestCase):
sample_gpu_list = ["0000:01:00.0"]
@patch("os.listdir", return_value=sample_gpu_list)
def test_get_from_proc(self, mock_listdir):
self.assertEqual(
drivers.get_nvidia_gpu_ids(),
self.sample_gpu_list
)
@patch(
"subprocess.run",
return_value=subprocess.CompletedProcess(
args=["lspci", "-D", "-n", "-d", "10de::0300"],
returncode=0,
stdout="0000:01:00.0 0300: 10de:21c4 (rev a1)\n"
)
)
@patch("os.listdir", side_effect=PermissionError())
def test_get_from_lspci(self, mock_listdir, mock_lspci):
self.assertEqual(
drivers.get_nvidia_gpu_ids(),
self.sample_gpu_list
)
class TestGetNvidiaGpuInfo(unittest.TestCase):
sample_gpu_id = "0000:01:00.0"
@patch("builtins.open", return_value=io.StringIO(SAMPLE_GPU_INFORMATION))
def test_get_from_proc(self, mock_open):
result = drivers.get_nvidia_gpu_info(self.sample_gpu_id)
self.assertEqual(result, SAMPLE_GPU_INFO_DICT)
@patch(
"subprocess.run",
return_value=subprocess.CompletedProcess(
[], 0,
stdout=SAMPLE_LSPCI_GPU
)
)
@patch("builtins.open", side_effect=PermissionError())
def test_get_from_lspci_glxinfo(self, mock_open, mock_lspci):
result = drivers.get_nvidia_gpu_info(self.sample_gpu_id)
self.assertDictContainsSubset(
{
"Model": "NVIDIA GeForce GTX 1660 SUPER",
"IRQ": "35",
"Bus Location": self.sample_gpu_id,
"Subsystem": "eVga.com. Corp. TU116 [GeForce GTX 1660 SUPER]",
"Interrupt": "pin A routed to IRQ 35",
"Region 0": "Memory at f6000000 (32-bit, non-prefetchable) [size=16M]",
"Region 1": "Memory at e0000000 (64-bit, prefetchable) [size=256M]",
"Region 3": "Memory at f0000000 (64-bit, prefetchable) [size=32M]",
"Region 5": "I/O ports at e000 [size=128]",
"Kernel driver in use": "nvidia",
},
result
)
class TestIsNvidia(unittest.TestCase):
def test_success_on_current_machine(self):
self.assertIsInstance(drivers.is_nvidia(), bool)
@patch("os.path.exists", return_value=False)
def test_not_nvidia_by_directory(self, mock_exists):
self.assertFalse(drivers.is_nvidia())
@patch("builtins.open", return_value=io.StringIO(""))
@patch("os.path.exists", side_effect=PermissionError())
def test_not_nvidia_proc_modules(self, mock_exists, mock_open):
self.assertFalse(drivers.is_nvidia())
# TODO: Add AMD GLX info and uncomment this test.
# @patch.object(drivers, "GlxInfo", return_value=FAKE_GLXINFO_AMD)
# @patch("builtins.open", side_effect=PermissionError())
# @patch("os.path.exists", side_effect=PermissionError())
# def test_not_nvidia_glxinfo(self, mock_exists, mock_open, mock_glxinfo):
# self.assertFalse(drivers.is_nvidia())
@patch("os.path.exists", return_value=True)
def test_is_nvidia_by_directory(self, mock_exists):
self.assertTrue(drivers.is_nvidia())
@patch("builtins.open", return_value=io.StringIO(SAMPLE_PROC_MODULES))
@patch("os.path.exists", side_effect=PermissionError())
def test_is_nvidia_proc_modules(self, mock_exists, mock_open):
self.assertTrue(drivers.is_nvidia())
@patch.object(drivers, "GlxInfo", return_value=FAKE_GLXINFO_NVIDIA)
@patch("builtins.open", side_effect=PermissionError())
@patch("os.path.exists", side_effect=PermissionError())
def test_is_nvidia_glxinfo(self, mock_exists, mock_open, mock_glxinfo):
self.assertTrue(drivers.is_nvidia())