qemu/qapi/qom.json

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

1119 lines
32 KiB
JSON
Raw Normal View History

# -*- Mode: Python -*-
# vim: filetype=python
#
# This work is licensed under the terms of the GNU GPL, version 2 or later.
# See the COPYING file in the top-level directory.
{ 'include': 'authz.json' }
{ 'include': 'block-core.json' }
{ 'include': 'common.json' }
{ 'include': 'crypto.json' }
##
# = QEMU Object Model (QOM)
##
##
# @ObjectPropertyInfo:
#
# @name: the name of the property
#
# @type: the type of the property. This will typically come in one of
# four forms:
#
# 1) A primitive type such as 'u8', 'u16', 'bool', 'str', or
# 'double'. These types are mapped to the appropriate JSON
# type.
#
# 2) A child type in the form 'child<subtype>' where subtype is a
# qdev device type name. Child properties create the
# composition tree.
#
# 3) A link type in the form 'link<subtype>' where subtype is a
# qdev device type name. Link properties form the device model
# graph.
#
# @description: if specified, the description of the property.
#
# @default-value: the default value, if any (since 5.0)
#
# Since: 1.2
##
{ 'struct': 'ObjectPropertyInfo',
'data': { 'name': 'str',
'type': 'str',
'*description': 'str',
'*default-value': 'any' } }
##
# @qom-list:
#
# This command will list any properties of a object given a path in
# the object model.
#
# @path: the path within the object model. See @qom-get for a
# description of this parameter.
#
# Returns: a list of @ObjectPropertyInfo that describe the properties
# of the object.
#
# Since: 1.2
#
# Example:
#
# -> { "execute": "qom-list",
# "arguments": { "path": "/chardevs" } }
# <- { "return": [ { "name": "type", "type": "string" },
# { "name": "parallel0", "type": "child<chardev-vc>" },
# { "name": "serial0", "type": "child<chardev-vc>" },
# { "name": "mon0", "type": "child<chardev-stdio>" } ] }
##
{ 'command': 'qom-list',
'data': { 'path': 'str' },
'returns': [ 'ObjectPropertyInfo' ],
'allow-preconfig': true }
##
# @qom-get:
#
# This command will get a property from a object model path and return
# the value.
#
# @path: The path within the object model. There are two forms of
# supported paths--absolute and partial paths.
#
# Absolute paths are derived from the root object and can follow
# child<> or link<> properties. Since they can follow link<>
# properties, they can be arbitrarily long. Absolute paths look
# like absolute filenames and are prefixed with a leading slash.
#
# Partial paths look like relative filenames. They do not begin
# with a prefix. The matching rules for partial paths are subtle
# but designed to make specifying objects easy. At each level of
# the composition tree, the partial path is matched as an absolute
# path. The first match is not returned. At least two matches
# are searched for. A successful result is only returned if only
# one match is found. If more than one match is found, a flag is
# return to indicate that the match was ambiguous.
#
# @property: The property name to read
#
# Returns: The property value. The type depends on the property type.
# child<> and link<> properties are returned as #str pathnames.
# All integer property types (u8, u16, etc) are returned as #int.
#
# Since: 1.2
#
# Examples:
#
# 1. Use absolute path
#
# -> { "execute": "qom-get",
# "arguments": { "path": "/machine/unattached/device[0]",
# "property": "hotplugged" } }
# <- { "return": false }
#
# 2. Use partial path
#
# -> { "execute": "qom-get",
# "arguments": { "path": "unattached/sysbus",
# "property": "type" } }
# <- { "return": "System" }
##
{ 'command': 'qom-get',
'data': { 'path': 'str', 'property': 'str' },
'returns': 'any',
'allow-preconfig': true }
##
# @qom-set:
#
# This command will set a property from a object model path.
#
# @path: see @qom-get for a description of this parameter
#
# @property: the property name to set
#
# @value: a value who's type is appropriate for the property type.
# See @qom-get for a description of type mapping.
#
# Since: 1.2
#
# Example:
#
# -> { "execute": "qom-set",
# "arguments": { "path": "/machine",
# "property": "graphics",
# "value": false } }
# <- { "return": {} }
##
{ 'command': 'qom-set',
'data': { 'path': 'str', 'property': 'str', 'value': 'any' },
'allow-preconfig': true }
##
# @ObjectTypeInfo:
#
# This structure describes a search result from @qom-list-types
#
# @name: the type name found in the search
#
# @abstract: the type is abstract and can't be directly instantiated.
# Omitted if false. (since 2.10)
#
# @parent: Name of parent type, if any (since 2.10)
#
# Since: 1.1
##
{ 'struct': 'ObjectTypeInfo',
'data': { 'name': 'str', '*abstract': 'bool', '*parent': 'str' } }
##
# @qom-list-types:
#
# This command will return a list of types given search parameters
#
# @implements: if specified, only return types that implement this
# type name
#
# @abstract: if true, include abstract types in the results
#
# Returns: a list of @ObjectTypeInfo or an empty list if no results
# are found
#
# Since: 1.1
##
{ 'command': 'qom-list-types',
'data': { '*implements': 'str', '*abstract': 'bool' },
'returns': [ 'ObjectTypeInfo' ],
'allow-preconfig': true }
##
# @qom-list-properties:
#
# List properties associated with a QOM object.
#
# @typename: the type name of an object
#
# Note: objects can create properties at runtime, for example to
# describe links between different devices and/or objects. These
# properties are not included in the output of this command.
#
# Returns: a list of ObjectPropertyInfo describing object properties
#
# Since: 2.12
##
{ 'command': 'qom-list-properties',
'data': { 'typename': 'str'},
'returns': [ 'ObjectPropertyInfo' ],
'allow-preconfig': true }
##
# @CanHostSocketcanProperties:
#
# Properties for can-host-socketcan objects.
#
# @if: interface name of the host system CAN bus to connect to
#
# @canbus: object ID of the can-bus object to connect to the host
# interface
#
# Since: 2.12
##
{ 'struct': 'CanHostSocketcanProperties',
'data': { 'if': 'str',
'canbus': 'str' } }
##
# @ColoCompareProperties:
#
# Properties for colo-compare objects.
#
# @primary_in: name of the character device backend to use for the
# primary input (incoming packets are redirected to @outdev)
#
# @secondary_in: name of the character device backend to use for
# secondary input (incoming packets are only compared to the input
# on @primary_in and then dropped)
#
# @outdev: name of the character device backend to use for output
#
# @iothread: name of the iothread to run in
#
# @notify_dev: name of the character device backend to be used to
# communicate with the remote colo-frame (only for Xen COLO)
#
# @compare_timeout: the maximum time to hold a packet from @primary_in
# for comparison with an incoming packet on @secondary_in in
# milliseconds (default: 3000)
#
# @expired_scan_cycle: the interval at which colo-compare checks
# whether packets from @primary have timed out, in milliseconds
# (default: 3000)
#
# @max_queue_size: the maximum number of packets to keep in the queue
# for comparing with incoming packets from @secondary_in. If the
# queue is full and additional packets are received, the
# additional packets are dropped. (default: 1024)
#
# @vnet_hdr_support: if true, vnet header support is enabled
# (default: false)
#
# Since: 2.8
##
{ 'struct': 'ColoCompareProperties',
'data': { 'primary_in': 'str',
'secondary_in': 'str',
'outdev': 'str',
'iothread': 'str',
'*notify_dev': 'str',
'*compare_timeout': 'uint64',
'*expired_scan_cycle': 'uint32',
'*max_queue_size': 'uint32',
'*vnet_hdr_support': 'bool' } }
##
# @CryptodevBackendProperties:
#
# Properties for cryptodev-backend and cryptodev-backend-builtin
# objects.
#
# @queues: the number of queues for the cryptodev backend. Ignored
# for cryptodev-backend and must be 1 for
# cryptodev-backend-builtin. (default: 1)
#
# @throttle-bps: limit total bytes per second (Since 8.0)
#
# @throttle-ops: limit total operations per second (Since 8.0)
#
# Since: 2.8
##
{ 'struct': 'CryptodevBackendProperties',
'data': { '*queues': 'uint32',
'*throttle-bps': 'uint64',
'*throttle-ops': 'uint64' } }
##
# @CryptodevVhostUserProperties:
#
# Properties for cryptodev-vhost-user objects.
#
# @chardev: the name of a Unix domain socket character device that
# connects to the vhost-user server
#
# Since: 2.12
##
{ 'struct': 'CryptodevVhostUserProperties',
'base': 'CryptodevBackendProperties',
'data': { 'chardev': 'str' } }
##
# @DBusVMStateProperties:
#
# Properties for dbus-vmstate objects.
#
# @addr: the name of the DBus bus to connect to
#
# @id-list: a comma separated list of DBus IDs of helpers whose data
# should be included in the VM state on migration
#
# Since: 5.0
##
{ 'struct': 'DBusVMStateProperties',
'data': { 'addr': 'str' ,
'*id-list': 'str' } }
##
# @NetfilterInsert:
#
# Indicates where to insert a netfilter relative to a given other
# filter.
#
# @before: insert before the specified filter
#
# @behind: insert behind the specified filter
#
# Since: 5.0
##
{ 'enum': 'NetfilterInsert',
'data': [ 'before', 'behind' ] }
##
# @NetfilterProperties:
#
# Properties for objects of classes derived from netfilter.
#
# @netdev: id of the network device backend to filter
#
# @queue: indicates which queue(s) to filter (default: all)
#
# @status: indicates whether the filter is enabled ("on") or disabled
# ("off") (default: "on")
#
# @position: specifies where the filter should be inserted in the
# filter list. "head" means the filter is inserted at the head of
# the filter list, before any existing filters. "tail" means the
# filter is inserted at the tail of the filter list, behind any
# existing filters (default). "id=<id>" means the filter is
# inserted before or behind the filter specified by <id>,
# depending on the @insert property. (default: "tail")
#
# @insert: where to insert the filter relative to the filter given in
# @position. Ignored if @position is "head" or "tail".
# (default: behind)
#
# Since: 2.5
##
{ 'struct': 'NetfilterProperties',
'data': { 'netdev': 'str',
'*queue': 'NetFilterDirection',
'*status': 'str',
'*position': 'str',
'*insert': 'NetfilterInsert' } }
##
# @FilterBufferProperties:
#
# Properties for filter-buffer objects.
#
# @interval: a non-zero interval in microseconds. All packets
# arriving in the given interval are delayed until the end of the
# interval.
#
# Since: 2.5
##
{ 'struct': 'FilterBufferProperties',
'base': 'NetfilterProperties',
'data': { 'interval': 'uint32' } }
##
# @FilterDumpProperties:
#
# Properties for filter-dump objects.
#
# @file: the filename where the dumped packets should be stored
#
# @maxlen: maximum number of bytes in a packet that are stored
# (default: 65536)
#
# Since: 2.5
##
{ 'struct': 'FilterDumpProperties',
'base': 'NetfilterProperties',
'data': { 'file': 'str',
'*maxlen': 'uint32' } }
##
# @FilterMirrorProperties:
#
# Properties for filter-mirror objects.
#
# @outdev: the name of a character device backend to which all
# incoming packets are mirrored
#
# @vnet_hdr_support: if true, vnet header support is enabled
# (default: false)
#
# Since: 2.6
##
{ 'struct': 'FilterMirrorProperties',
'base': 'NetfilterProperties',
'data': { 'outdev': 'str',
'*vnet_hdr_support': 'bool' } }
##
# @FilterRedirectorProperties:
#
# Properties for filter-redirector objects.
#
# At least one of @indev or @outdev must be present. If both are
# present, they must not refer to the same character device backend.
#
# @indev: the name of a character device backend from which packets
# are received and redirected to the filtered network device
#
# @outdev: the name of a character device backend to which all
# incoming packets are redirected
#
# @vnet_hdr_support: if true, vnet header support is enabled
# (default: false)
#
# Since: 2.6
##
{ 'struct': 'FilterRedirectorProperties',
'base': 'NetfilterProperties',
'data': { '*indev': 'str',
'*outdev': 'str',
'*vnet_hdr_support': 'bool' } }
##
# @FilterRewriterProperties:
#
# Properties for filter-rewriter objects.
#
# @vnet_hdr_support: if true, vnet header support is enabled
# (default: false)
#
# Since: 2.8
##
{ 'struct': 'FilterRewriterProperties',
'base': 'NetfilterProperties',
'data': { '*vnet_hdr_support': 'bool' } }
##
# @InputBarrierProperties:
#
# Properties for input-barrier objects.
#
# @name: the screen name as declared in the screens section of
# barrier.conf
#
# @server: hostname of the Barrier server (default: "localhost")
#
# @port: TCP port of the Barrier server (default: "24800")
#
# @x-origin: x coordinate of the leftmost pixel on the guest screen
# (default: "0")
#
# @y-origin: y coordinate of the topmost pixel on the guest screen
# (default: "0")
#
# @width: the width of secondary screen in pixels (default: "1920")
#
# @height: the height of secondary screen in pixels (default: "1080")
#
# Since: 4.2
##
{ 'struct': 'InputBarrierProperties',
'data': { 'name': 'str',
'*server': 'str',
'*port': 'str',
'*x-origin': 'str',
'*y-origin': 'str',
'*width': 'str',
'*height': 'str' } }
##
# @InputLinuxProperties:
#
# Properties for input-linux objects.
#
# @evdev: the path of the host evdev device to use
#
# @grab_all: if true, grab is toggled for all devices (e.g. both
# keyboard and mouse) instead of just one device (default: false)
#
# @repeat: enables auto-repeat events (default: false)
#
# @grab-toggle: the key or key combination that toggles device grab
# (default: ctrl-ctrl)
#
# Since: 2.6
##
{ 'struct': 'InputLinuxProperties',
'data': { 'evdev': 'str',
'*grab_all': 'bool',
'*repeat': 'bool',
'*grab-toggle': 'GrabToggleKeys' } }
Introduce event-loop-base abstract class Introduce the 'event-loop-base' abstract class, it'll hold the properties common to all event loops and provide the necessary hooks for their creation and maintenance. Then have iothread inherit from it. EventLoopBaseClass is defined as user creatable and provides a hook for its children to attach themselves to the user creatable class 'complete' function. It also provides an update_params() callback to propagate property changes onto its children. The new 'event-loop-base' class will live in the root directory. It is built on its own using the 'link_whole' option (there are no direct function dependencies between the class and its children, it all happens trough 'constructor' magic). And also imposes new compilation dependencies: qom <- event-loop-base <- blockdev (iothread.c) And in subsequent patches: qom <- event-loop-base <- qemuutil (util/main-loop.c) All this forced some amount of reordering in meson.build: - Moved qom build definition before qemuutil. Doing it the other way around (i.e. moving qemuutil after qom) isn't possible as a lot of core libraries that live in between the two depend on it. - Process the 'hw' subdir earlier, as it introduces files into the 'qom' source set. No functional changes intended. Signed-off-by: Nicolas Saenz Julienne <nsaenzju@redhat.com> Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> Acked-by: Markus Armbruster <armbru@redhat.com> Message-id: 20220425075723.20019-2-nsaenzju@redhat.com Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2022-04-25 07:57:21 +00:00
##
# @EventLoopBaseProperties:
#
# Common properties for event loops
#
# @aio-max-batch: maximum number of requests in a batch for the AIO
# engine, 0 means that the engine will use its default.
# (default: 0)
Introduce event-loop-base abstract class Introduce the 'event-loop-base' abstract class, it'll hold the properties common to all event loops and provide the necessary hooks for their creation and maintenance. Then have iothread inherit from it. EventLoopBaseClass is defined as user creatable and provides a hook for its children to attach themselves to the user creatable class 'complete' function. It also provides an update_params() callback to propagate property changes onto its children. The new 'event-loop-base' class will live in the root directory. It is built on its own using the 'link_whole' option (there are no direct function dependencies between the class and its children, it all happens trough 'constructor' magic). And also imposes new compilation dependencies: qom <- event-loop-base <- blockdev (iothread.c) And in subsequent patches: qom <- event-loop-base <- qemuutil (util/main-loop.c) All this forced some amount of reordering in meson.build: - Moved qom build definition before qemuutil. Doing it the other way around (i.e. moving qemuutil after qom) isn't possible as a lot of core libraries that live in between the two depend on it. - Process the 'hw' subdir earlier, as it introduces files into the 'qom' source set. No functional changes intended. Signed-off-by: Nicolas Saenz Julienne <nsaenzju@redhat.com> Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> Acked-by: Markus Armbruster <armbru@redhat.com> Message-id: 20220425075723.20019-2-nsaenzju@redhat.com Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2022-04-25 07:57:21 +00:00
#
# @thread-pool-min: minimum number of threads reserved in the thread
# pool (default:0)
#
# @thread-pool-max: maximum number of threads the thread pool can
# contain (default:64)
#
Introduce event-loop-base abstract class Introduce the 'event-loop-base' abstract class, it'll hold the properties common to all event loops and provide the necessary hooks for their creation and maintenance. Then have iothread inherit from it. EventLoopBaseClass is defined as user creatable and provides a hook for its children to attach themselves to the user creatable class 'complete' function. It also provides an update_params() callback to propagate property changes onto its children. The new 'event-loop-base' class will live in the root directory. It is built on its own using the 'link_whole' option (there are no direct function dependencies between the class and its children, it all happens trough 'constructor' magic). And also imposes new compilation dependencies: qom <- event-loop-base <- blockdev (iothread.c) And in subsequent patches: qom <- event-loop-base <- qemuutil (util/main-loop.c) All this forced some amount of reordering in meson.build: - Moved qom build definition before qemuutil. Doing it the other way around (i.e. moving qemuutil after qom) isn't possible as a lot of core libraries that live in between the two depend on it. - Process the 'hw' subdir earlier, as it introduces files into the 'qom' source set. No functional changes intended. Signed-off-by: Nicolas Saenz Julienne <nsaenzju@redhat.com> Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> Acked-by: Markus Armbruster <armbru@redhat.com> Message-id: 20220425075723.20019-2-nsaenzju@redhat.com Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2022-04-25 07:57:21 +00:00
# Since: 7.1
##
{ 'struct': 'EventLoopBaseProperties',
'data': { '*aio-max-batch': 'int',
'*thread-pool-min': 'int',
'*thread-pool-max': 'int' } }
Introduce event-loop-base abstract class Introduce the 'event-loop-base' abstract class, it'll hold the properties common to all event loops and provide the necessary hooks for their creation and maintenance. Then have iothread inherit from it. EventLoopBaseClass is defined as user creatable and provides a hook for its children to attach themselves to the user creatable class 'complete' function. It also provides an update_params() callback to propagate property changes onto its children. The new 'event-loop-base' class will live in the root directory. It is built on its own using the 'link_whole' option (there are no direct function dependencies between the class and its children, it all happens trough 'constructor' magic). And also imposes new compilation dependencies: qom <- event-loop-base <- blockdev (iothread.c) And in subsequent patches: qom <- event-loop-base <- qemuutil (util/main-loop.c) All this forced some amount of reordering in meson.build: - Moved qom build definition before qemuutil. Doing it the other way around (i.e. moving qemuutil after qom) isn't possible as a lot of core libraries that live in between the two depend on it. - Process the 'hw' subdir earlier, as it introduces files into the 'qom' source set. No functional changes intended. Signed-off-by: Nicolas Saenz Julienne <nsaenzju@redhat.com> Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> Acked-by: Markus Armbruster <armbru@redhat.com> Message-id: 20220425075723.20019-2-nsaenzju@redhat.com Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2022-04-25 07:57:21 +00:00
##
# @IothreadProperties:
#
# Properties for iothread objects.
#
# @poll-max-ns: the maximum number of nanoseconds to busy wait for
# events. 0 means polling is disabled (default: 32768 on POSIX
# hosts, 0 otherwise)
#
# @poll-grow: the multiplier used to increase the polling time when
# the algorithm detects it is missing events due to not polling
# long enough. 0 selects a default behaviour (default: 0)
#
# @poll-shrink: the divisor used to decrease the polling time when the
# algorithm detects it is spending too long polling without
# encountering events. 0 selects a default behaviour (default: 0)
#
Introduce event-loop-base abstract class Introduce the 'event-loop-base' abstract class, it'll hold the properties common to all event loops and provide the necessary hooks for their creation and maintenance. Then have iothread inherit from it. EventLoopBaseClass is defined as user creatable and provides a hook for its children to attach themselves to the user creatable class 'complete' function. It also provides an update_params() callback to propagate property changes onto its children. The new 'event-loop-base' class will live in the root directory. It is built on its own using the 'link_whole' option (there are no direct function dependencies between the class and its children, it all happens trough 'constructor' magic). And also imposes new compilation dependencies: qom <- event-loop-base <- blockdev (iothread.c) And in subsequent patches: qom <- event-loop-base <- qemuutil (util/main-loop.c) All this forced some amount of reordering in meson.build: - Moved qom build definition before qemuutil. Doing it the other way around (i.e. moving qemuutil after qom) isn't possible as a lot of core libraries that live in between the two depend on it. - Process the 'hw' subdir earlier, as it introduces files into the 'qom' source set. No functional changes intended. Signed-off-by: Nicolas Saenz Julienne <nsaenzju@redhat.com> Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> Acked-by: Markus Armbruster <armbru@redhat.com> Message-id: 20220425075723.20019-2-nsaenzju@redhat.com Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2022-04-25 07:57:21 +00:00
# The @aio-max-batch option is available since 6.1.
#
# Since: 2.0
##
{ 'struct': 'IothreadProperties',
Introduce event-loop-base abstract class Introduce the 'event-loop-base' abstract class, it'll hold the properties common to all event loops and provide the necessary hooks for their creation and maintenance. Then have iothread inherit from it. EventLoopBaseClass is defined as user creatable and provides a hook for its children to attach themselves to the user creatable class 'complete' function. It also provides an update_params() callback to propagate property changes onto its children. The new 'event-loop-base' class will live in the root directory. It is built on its own using the 'link_whole' option (there are no direct function dependencies between the class and its children, it all happens trough 'constructor' magic). And also imposes new compilation dependencies: qom <- event-loop-base <- blockdev (iothread.c) And in subsequent patches: qom <- event-loop-base <- qemuutil (util/main-loop.c) All this forced some amount of reordering in meson.build: - Moved qom build definition before qemuutil. Doing it the other way around (i.e. moving qemuutil after qom) isn't possible as a lot of core libraries that live in between the two depend on it. - Process the 'hw' subdir earlier, as it introduces files into the 'qom' source set. No functional changes intended. Signed-off-by: Nicolas Saenz Julienne <nsaenzju@redhat.com> Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> Acked-by: Markus Armbruster <armbru@redhat.com> Message-id: 20220425075723.20019-2-nsaenzju@redhat.com Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2022-04-25 07:57:21 +00:00
'base': 'EventLoopBaseProperties',
'data': { '*poll-max-ns': 'int',
'*poll-grow': 'int',
Introduce event-loop-base abstract class Introduce the 'event-loop-base' abstract class, it'll hold the properties common to all event loops and provide the necessary hooks for their creation and maintenance. Then have iothread inherit from it. EventLoopBaseClass is defined as user creatable and provides a hook for its children to attach themselves to the user creatable class 'complete' function. It also provides an update_params() callback to propagate property changes onto its children. The new 'event-loop-base' class will live in the root directory. It is built on its own using the 'link_whole' option (there are no direct function dependencies between the class and its children, it all happens trough 'constructor' magic). And also imposes new compilation dependencies: qom <- event-loop-base <- blockdev (iothread.c) And in subsequent patches: qom <- event-loop-base <- qemuutil (util/main-loop.c) All this forced some amount of reordering in meson.build: - Moved qom build definition before qemuutil. Doing it the other way around (i.e. moving qemuutil after qom) isn't possible as a lot of core libraries that live in between the two depend on it. - Process the 'hw' subdir earlier, as it introduces files into the 'qom' source set. No functional changes intended. Signed-off-by: Nicolas Saenz Julienne <nsaenzju@redhat.com> Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> Acked-by: Markus Armbruster <armbru@redhat.com> Message-id: 20220425075723.20019-2-nsaenzju@redhat.com Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2022-04-25 07:57:21 +00:00
'*poll-shrink': 'int' } }
##
# @MainLoopProperties:
#
# Properties for the main-loop object.
#
# Since: 7.1
##
{ 'struct': 'MainLoopProperties',
'base': 'EventLoopBaseProperties',
'data': {} }
##
# @MemoryBackendProperties:
#
# Properties for objects of classes derived from memory-backend.
#
# @merge: if true, mark the memory as mergeable (default depends on
# the machine type)
#
# @dump: if true, include the memory in core dumps (default depends on
# the machine type)
#
# @host-nodes: the list of NUMA host nodes to bind the memory to
#
# @policy: the NUMA policy (default: 'default')
#
# @prealloc: if true, preallocate memory (default: false)
#
# @prealloc-threads: number of CPU threads to use for prealloc
# (default: 1)
#
# @prealloc-context: thread context to use for creation of
# preallocation threads (default: none) (since 7.2)
#
# @share: if false, the memory is private to QEMU; if true, it is
# shared (default: false)
#
# @reserve: if true, reserve swap space (or huge pages) if applicable
# (default: true) (since 6.1)
#
# @size: size of the memory region in bytes
#
# @x-use-canonical-path-for-ramblock-id: if true, the canonical path
# is used for ramblock-id. Disable this for 4.0 machine types or
# older to allow migration with newer QEMU versions.
# (default: false generally, but true for machine types <= 4.0)
#
# Note: prealloc=true and reserve=false cannot be set at the same
# time. With reserve=true, the behavior depends on the operating
# system: for example, Linux will not reserve swap space for
# shared file mappings -- "not applicable". In contrast,
# reserve=false will bail out if it cannot be configured
# accordingly.
#
# Since: 2.1
##
{ 'struct': 'MemoryBackendProperties',
'data': { '*dump': 'bool',
'*host-nodes': ['uint16'],
'*merge': 'bool',
'*policy': 'HostMemPolicy',
'*prealloc': 'bool',
'*prealloc-threads': 'uint32',
'*prealloc-context': 'str',
'*share': 'bool',
'*reserve': 'bool',
'size': 'size',
'*x-use-canonical-path-for-ramblock-id': 'bool' } }
##
# @MemoryBackendFileProperties:
#
# Properties for memory-backend-file objects.
#
# @align: the base address alignment when QEMU mmap(2)s @mem-path.
# Some backend stores specified by @mem-path require an alignment
# different than the default one used by QEMU, e.g. the device DAX
# /dev/dax0.0 requires 2M alignment rather than 4K. In such cases,
# users can specify the required alignment via this option. 0
# selects a default alignment (currently the page size).
# (default: 0)
#
# @offset: the offset into the target file that the region starts at.
# You can use this option to back multiple regions with a single
# file. Must be a multiple of the page size.
# (default: 0) (since 8.1)
#
# @discard-data: if true, the file contents can be destroyed when QEMU
# exits, to avoid unnecessarily flushing data to the backing file.
# Note that @discard-data is only an optimization, and QEMU might
# not discard file contents if it aborts unexpectedly or is
# terminated using SIGKILL. (default: false)
#
# @mem-path: the path to either a shared memory or huge page
# filesystem mount
#
# @pmem: specifies whether the backing file specified by @mem-path is
# in host persistent memory that can be accessed using the SNIA
# NVM programming model (e.g. Intel NVDIMM).
#
# @readonly: if true, the backing file is opened read-only; if false,
# it is opened read-write. (default: false)
#
# @rom: whether to create Read Only Memory (ROM) that cannot be
# modified by the VM. Any write attempts to such ROM will be
# denied. Most use cases want writable RAM instead of ROM.
# However, selected use cases, like R/O NVDIMMs, can benefit from
# ROM. If set to 'on', create ROM; if set to 'off', create
# writable RAM; if set to 'auto', the value of the @readonly
# property is used. This property is primarily helpful when we
# want to have proper RAM in configurations that would
# traditionally create ROM before this property was introduced: VM
# templating, where we want to open a file readonly (@readonly set
# to true) and mark the memory to be private for QEMU (@share set
# to false). For this use case, we need writable RAM instead of
# ROM, and want to set this property to 'off'. (default: auto,
# since 8.2)
backends/hostmem-file: Add "rom" property to support VM templating with R/O files For now, "share=off,readonly=on" would always result in us opening the file R/O and mmap'ing the opened file MAP_PRIVATE R/O -- effectively turning it into ROM. Especially for VM templating, "share=off" is a common use case. However, that use case is impossible with files that lack write permissions, because "share=off,readonly=on" will not give us writable RAM. The sole user of ROM via memory-backend-file are R/O NVDIMMs, but as we have users (Kata Containers) that rely on the existing behavior -- malicious VMs should not be able to consume COW memory for R/O NVDIMMs -- we cannot change the semantics of "share=off,readonly=on" So let's add a new "rom" property with on/off/auto values. "auto" is the default and what most people will use: for historical reasons, to not change the old semantics, it defaults to the value of the "readonly" property. For VM templating, one can now use: -object memory-backend-file,share=off,readonly=on,rom=off,... But we'll disallow: -object memory-backend-file,share=on,readonly=on,rom=off,... because we would otherwise get an error when trying to mmap the R/O file shared and writable. An explicit error message is cleaner. We will also disallow for now: -object memory-backend-file,share=off,readonly=off,rom=on,... -object memory-backend-file,share=on,readonly=off,rom=on,... It's not harmful, but also not really required for now. Alternatives that were abandoned: * Make "unarmed=on" for the NVDIMM set the memory region container readonly. We would still see a change of ROM->RAM and possibly run into memslot limits with vhost-user. Further, there might be use cases for "unarmed=on" that should still allow writing to that memory (temporary files, system RAM, ...). * Add a new "readonly=on/off/auto" parameter for NVDIMMs. Similar issues as with "unarmed=on". * Make "readonly" consume "on/off/file" instead of being a 'bool' type. This would slightly changes the behavior of the "readonly" parameter: values like true/false (as accepted by a 'bool'type) would no longer be accepted. Message-ID: <20230906120503.359863-4-david@redhat.com> Acked-by: Markus Armbruster <armbru@redhat.com> Signed-off-by: David Hildenbrand <david@redhat.com>
2023-09-06 12:04:55 +00:00
#
# Since: 2.1
##
{ 'struct': 'MemoryBackendFileProperties',
'base': 'MemoryBackendProperties',
'data': { '*align': 'size',
'*offset': 'size',
'*discard-data': 'bool',
'mem-path': 'str',
'*pmem': { 'type': 'bool', 'if': 'CONFIG_LIBPMEM' },
backends/hostmem-file: Add "rom" property to support VM templating with R/O files For now, "share=off,readonly=on" would always result in us opening the file R/O and mmap'ing the opened file MAP_PRIVATE R/O -- effectively turning it into ROM. Especially for VM templating, "share=off" is a common use case. However, that use case is impossible with files that lack write permissions, because "share=off,readonly=on" will not give us writable RAM. The sole user of ROM via memory-backend-file are R/O NVDIMMs, but as we have users (Kata Containers) that rely on the existing behavior -- malicious VMs should not be able to consume COW memory for R/O NVDIMMs -- we cannot change the semantics of "share=off,readonly=on" So let's add a new "rom" property with on/off/auto values. "auto" is the default and what most people will use: for historical reasons, to not change the old semantics, it defaults to the value of the "readonly" property. For VM templating, one can now use: -object memory-backend-file,share=off,readonly=on,rom=off,... But we'll disallow: -object memory-backend-file,share=on,readonly=on,rom=off,... because we would otherwise get an error when trying to mmap the R/O file shared and writable. An explicit error message is cleaner. We will also disallow for now: -object memory-backend-file,share=off,readonly=off,rom=on,... -object memory-backend-file,share=on,readonly=off,rom=on,... It's not harmful, but also not really required for now. Alternatives that were abandoned: * Make "unarmed=on" for the NVDIMM set the memory region container readonly. We would still see a change of ROM->RAM and possibly run into memslot limits with vhost-user. Further, there might be use cases for "unarmed=on" that should still allow writing to that memory (temporary files, system RAM, ...). * Add a new "readonly=on/off/auto" parameter for NVDIMMs. Similar issues as with "unarmed=on". * Make "readonly" consume "on/off/file" instead of being a 'bool' type. This would slightly changes the behavior of the "readonly" parameter: values like true/false (as accepted by a 'bool'type) would no longer be accepted. Message-ID: <20230906120503.359863-4-david@redhat.com> Acked-by: Markus Armbruster <armbru@redhat.com> Signed-off-by: David Hildenbrand <david@redhat.com>
2023-09-06 12:04:55 +00:00
'*readonly': 'bool',
'*rom': 'OnOffAuto' } }
##
# @MemoryBackendMemfdProperties:
#
# Properties for memory-backend-memfd objects.
#
# The @share boolean option is true by default with memfd.
#
# @hugetlb: if true, the file to be created resides in the hugetlbfs
# filesystem (default: false)
#
# @hugetlbsize: the hugetlb page size on systems that support multiple
# hugetlb page sizes (it must be a power of 2 value supported by
# the system). 0 selects a default page size. This option is
# ignored if @hugetlb is false. (default: 0)
#
# @seal: if true, create a sealed-file, which will block further
# resizing of the memory (default: true)
#
# Since: 2.12
##
{ 'struct': 'MemoryBackendMemfdProperties',
'base': 'MemoryBackendProperties',
'data': { '*hugetlb': 'bool',
'*hugetlbsize': 'size',
'*seal': 'bool' } }
##
# @MemoryBackendEpcProperties:
#
# Properties for memory-backend-epc objects.
#
# The @share boolean option is true by default with epc
#
# The @merge boolean option is false by default with epc
#
# The @dump boolean option is false by default with epc
#
# Since: 6.2
##
{ 'struct': 'MemoryBackendEpcProperties',
'base': 'MemoryBackendProperties',
'data': {} }
##
# @PrManagerHelperProperties:
#
# Properties for pr-manager-helper objects.
#
# @path: the path to a Unix domain socket for connecting to the
# external helper
#
# Since: 2.11
##
{ 'struct': 'PrManagerHelperProperties',
'data': { 'path': 'str' } }
##
# @QtestProperties:
#
# Properties for qtest objects.
#
# @chardev: the chardev to be used to receive qtest commands on.
#
# @log: the path to a log file
#
# Since: 6.0
##
{ 'struct': 'QtestProperties',
'data': { 'chardev': 'str',
'*log': 'str' } }
##
# @RemoteObjectProperties:
#
# Properties for x-remote-object objects.
#
# @fd: file descriptor name previously passed via 'getfd' command
#
# @devid: the id of the device to be associated with the file
# descriptor
#
# Since: 6.0
##
{ 'struct': 'RemoteObjectProperties',
'data': { 'fd': 'str', 'devid': 'str' } }
##
# @VfioUserServerProperties:
#
# Properties for x-vfio-user-server objects.
#
# @socket: socket to be used by the libvfio-user library
#
# @device: the ID of the device to be emulated at the server
#
# Since: 7.1
##
{ 'struct': 'VfioUserServerProperties',
'data': { 'socket': 'SocketAddress', 'device': 'str' } }
##
# @IOMMUFDProperties:
#
# Properties for iommufd objects.
#
# @fd: file descriptor name previously passed via 'getfd' command,
# which represents a pre-opened /dev/iommu. This allows the
# iommufd object to be shared across several subsystems (VFIO,
# VDPA, ...), and the file descriptor to be shared with other
# process, e.g. DPDK. (default: QEMU opens /dev/iommu by itself)
#
# Since: 9.0
##
{ 'struct': 'IOMMUFDProperties',
'data': { '*fd': 'str' } }
qom: new object to associate device to NUMA node NVIDIA GPU's support MIG (Mult-Instance GPUs) feature [1], which allows partitioning of the GPU device resources (including device memory) into several (upto 8) isolated instances. Each of the partitioned memory needs a dedicated NUMA node to operate. The partitions are not fixed and they can be created/deleted at runtime. Unfortunately Linux OS does not provide a means to dynamically create/destroy NUMA nodes and such feature implementation is not expected to be trivial. The nodes that OS discovers at the boot time while parsing SRAT remains fixed. So we utilize the Generic Initiator (GI) Affinity structures that allows association between nodes and devices. Multiple GI structures per BDF is possible, allowing creation of multiple nodes by exposing unique PXM in each of these structures. Implement the mechanism to build the GI affinity structures as Qemu currently does not. Introduce a new acpi-generic-initiator object to allow host admin link a device with an associated NUMA node. Qemu maintains this association and use this object to build the requisite GI Affinity Structure. When multiple NUMA nodes are associated with a device, it is required to create those many number of acpi-generic-initiator objects, each representing a unique device:node association. Following is one of a decoded GI affinity structure in VM ACPI SRAT. [0C8h 0200 1] Subtable Type : 05 [Generic Initiator Affinity] [0C9h 0201 1] Length : 20 [0CAh 0202 1] Reserved1 : 00 [0CBh 0203 1] Device Handle Type : 01 [0CCh 0204 4] Proximity Domain : 00000007 [0D0h 0208 16] Device Handle : 00 00 20 00 00 00 00 00 00 00 00 00 00 00 00 00 [0E0h 0224 4] Flags (decoded below) : 00000001 Enabled : 1 [0E4h 0228 4] Reserved2 : 00000000 [0E8h 0232 1] Subtable Type : 05 [Generic Initiator Affinity] [0E9h 0233 1] Length : 20 An admin can provide a range of acpi-generic-initiator objects, each associating a device (by providing the id through pci-dev argument) to the desired NUMA node (using the node argument). Currently, only PCI device is supported. For the grace hopper system, create a range of 8 nodes and associate that with the device using the acpi-generic-initiator object. While a configuration of less than 8 nodes per device is allowed, such configuration will prevent utilization of the feature to the fullest. The following sample creates 8 nodes per PCI device for a VM with 2 PCI devices and link them to the respecitve PCI device using acpi-generic-initiator objects: -numa node,nodeid=2 -numa node,nodeid=3 -numa node,nodeid=4 \ -numa node,nodeid=5 -numa node,nodeid=6 -numa node,nodeid=7 \ -numa node,nodeid=8 -numa node,nodeid=9 \ -device vfio-pci-nohotplug,host=0009:01:00.0,bus=pcie.0,addr=04.0,rombar=0,id=dev0 \ -object acpi-generic-initiator,id=gi0,pci-dev=dev0,node=2 \ -object acpi-generic-initiator,id=gi1,pci-dev=dev0,node=3 \ -object acpi-generic-initiator,id=gi2,pci-dev=dev0,node=4 \ -object acpi-generic-initiator,id=gi3,pci-dev=dev0,node=5 \ -object acpi-generic-initiator,id=gi4,pci-dev=dev0,node=6 \ -object acpi-generic-initiator,id=gi5,pci-dev=dev0,node=7 \ -object acpi-generic-initiator,id=gi6,pci-dev=dev0,node=8 \ -object acpi-generic-initiator,id=gi7,pci-dev=dev0,node=9 \ -numa node,nodeid=10 -numa node,nodeid=11 -numa node,nodeid=12 \ -numa node,nodeid=13 -numa node,nodeid=14 -numa node,nodeid=15 \ -numa node,nodeid=16 -numa node,nodeid=17 \ -device vfio-pci-nohotplug,host=0009:01:01.0,bus=pcie.0,addr=05.0,rombar=0,id=dev1 \ -object acpi-generic-initiator,id=gi8,pci-dev=dev1,node=10 \ -object acpi-generic-initiator,id=gi9,pci-dev=dev1,node=11 \ -object acpi-generic-initiator,id=gi10,pci-dev=dev1,node=12 \ -object acpi-generic-initiator,id=gi11,pci-dev=dev1,node=13 \ -object acpi-generic-initiator,id=gi12,pci-dev=dev1,node=14 \ -object acpi-generic-initiator,id=gi13,pci-dev=dev1,node=15 \ -object acpi-generic-initiator,id=gi14,pci-dev=dev1,node=16 \ -object acpi-generic-initiator,id=gi15,pci-dev=dev1,node=17 \ Link: https://www.nvidia.com/en-in/technologies/multi-instance-gpu [1] Cc: Jonathan Cameron <qemu-devel@nongnu.org> Cc: Alex Williamson <alex.williamson@redhat.com> Cc: Markus Armbruster <armbru@redhat.com> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Acked-by: Markus Armbruster <armbru@redhat.com> Signed-off-by: Ankit Agrawal <ankita@nvidia.com> Message-Id: <20240308145525.10886-2-ankita@nvidia.com> Reviewed-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2024-03-08 14:55:23 +00:00
##
# @AcpiGenericInitiatorProperties:
#
# Properties for acpi-generic-initiator objects.
#
# @pci-dev: PCI device ID to be associated with the node
#
# @node: NUMA node associated with the PCI device
#
# Since: 9.0
##
{ 'struct': 'AcpiGenericInitiatorProperties',
'data': { 'pci-dev': 'str',
'node': 'uint32' } }
##
# @RngProperties:
#
# Properties for objects of classes derived from rng.
#
# @opened: if true, the device is opened immediately when applying
# this option and will probably fail when processing the next
# option. Don't use; only provided for compatibility.
# (default: false)
#
# Features:
#
# @deprecated: Member @opened is deprecated. Setting true doesn't
# make sense, and false is already the default.
#
# Since: 1.3
##
{ 'struct': 'RngProperties',
'data': { '*opened': { 'type': 'bool', 'features': ['deprecated'] } } }
##
# @RngEgdProperties:
#
# Properties for rng-egd objects.
#
# @chardev: the name of a character device backend that provides the
# connection to the RNG daemon
#
# Since: 1.3
##
{ 'struct': 'RngEgdProperties',
'base': 'RngProperties',
'data': { 'chardev': 'str' } }
##
# @RngRandomProperties:
#
# Properties for rng-random objects.
#
# @filename: the filename of the device on the host to obtain entropy
# from (default: "/dev/urandom")
#
# Since: 1.3
##
{ 'struct': 'RngRandomProperties',
'base': 'RngProperties',
'data': { '*filename': 'str' } }
##
# @SevGuestProperties:
#
# Properties for sev-guest objects.
#
# @sev-device: SEV device to use (default: "/dev/sev")
#
# @dh-cert-file: guest owners DH certificate (encoded with base64)
#
# @session-file: guest owners session parameters (encoded with base64)
#
# @policy: SEV policy value (default: 0x1)
#
# @handle: SEV firmware handle (default: 0)
#
# @cbitpos: C-bit location in page table entry (default: 0)
#
# @reduced-phys-bits: number of bits in physical addresses that become
# unavailable when SEV is enabled
#
# @kernel-hashes: if true, add hashes of kernel/initrd/cmdline to a
# designated guest firmware page for measured boot with -kernel
# (default: false) (since 6.2)
#
# @legacy-vm-type: Use legacy KVM_SEV_INIT KVM interface for creating the VM.
# The newer KVM_SEV_INIT2 interface syncs additional vCPU
# state when initializing the VMSA structures, which will
# result in a different guest measurement. Set this to
# maintain compatibility with older QEMU or kernel versions
# that rely on legacy KVM_SEV_INIT behavior.
# (default: false) (since 9.1)
#
# Since: 2.12
##
{ 'struct': 'SevGuestProperties',
'data': { '*sev-device': 'str',
'*dh-cert-file': 'str',
'*session-file': 'str',
'*policy': 'uint32',
'*handle': 'uint32',
'*cbitpos': 'uint32',
'reduced-phys-bits': 'uint32',
'*kernel-hashes': 'bool',
'*legacy-vm-type': 'bool' } }
util: Introduce ThreadContext user-creatable object Setting the CPU affinity of QEMU threads is a bit problematic, because QEMU doesn't always have permissions to set the CPU affinity itself, for example, with seccomp after initialized by QEMU: -sandbox enable=on,resourcecontrol=deny General information about CPU affinities can be found in the man page of taskset: CPU affinity is a scheduler property that "bonds" a process to a given set of CPUs on the system. The Linux scheduler will honor the given CPU affinity and the process will not run on any other CPUs. While upper layers are already aware of how to handle CPU affinities for long-lived threads like iothreads or vcpu threads, especially short-lived threads, as used for memory-backend preallocation, are more involved to handle. These threads are created on demand and upper layers are not even able to identify and configure them. Introduce the concept of a ThreadContext, that is essentially a thread used for creating new threads. All threads created via that context thread inherit the configured CPU affinity. Consequently, it's sufficient to create a ThreadContext and configure it once, and have all threads created via that ThreadContext inherit the same CPU affinity. The CPU affinity of a ThreadContext can be configured two ways: (1) Obtaining the thread id via the "thread-id" property and setting the CPU affinity manually (e.g., via taskset). (2) Setting the "cpu-affinity" property and letting QEMU try set the CPU affinity itself. This will fail if QEMU doesn't have permissions to do so anymore after seccomp was initialized. A simple QEMU example to set the CPU affinity to host CPU 0,1,6,7 would be: qemu-system-x86_64 -S \ -object thread-context,id=tc1,cpu-affinity=0-1,cpu-affinity=6-7 And we can query it via HMP/QMP: (qemu) qom-get tc1 cpu-affinity [ 0, 1, 6, 7 ] But note that due to dynamic library loading this example will not work before we actually make use of thread_context_create_thread() in QEMU code, because the type will otherwise not get registered. We'll wire this up next to make it work. In general, the interface behaves like pthread_setaffinity_np(): host CPU numbers that are currently not available are ignored; only host CPU numbers that are impossible with the current kernel will fail. If the list of host CPU numbers does not include a single CPU that is available, setting the CPU affinity will fail. A ThreadContext can be reused, simply by reconfiguring the CPU affinity. Note that the CPU affinity of previously created threads will not get adjusted. Reviewed-by: Michal Privoznik <mprivozn@redhat.com> Acked-by: Markus Armbruster <armbru@redhat.com> Message-Id: <20221014134720.168738-4-david@redhat.com> Signed-off-by: David Hildenbrand <david@redhat.com>
2022-10-14 13:47:16 +00:00
##
# @ThreadContextProperties:
#
# Properties for thread context objects.
#
# @cpu-affinity: the list of host CPU numbers used as CPU affinity for
# all threads created in the thread context (default: QEMU main
# thread CPU affinity)
util: Introduce ThreadContext user-creatable object Setting the CPU affinity of QEMU threads is a bit problematic, because QEMU doesn't always have permissions to set the CPU affinity itself, for example, with seccomp after initialized by QEMU: -sandbox enable=on,resourcecontrol=deny General information about CPU affinities can be found in the man page of taskset: CPU affinity is a scheduler property that "bonds" a process to a given set of CPUs on the system. The Linux scheduler will honor the given CPU affinity and the process will not run on any other CPUs. While upper layers are already aware of how to handle CPU affinities for long-lived threads like iothreads or vcpu threads, especially short-lived threads, as used for memory-backend preallocation, are more involved to handle. These threads are created on demand and upper layers are not even able to identify and configure them. Introduce the concept of a ThreadContext, that is essentially a thread used for creating new threads. All threads created via that context thread inherit the configured CPU affinity. Consequently, it's sufficient to create a ThreadContext and configure it once, and have all threads created via that ThreadContext inherit the same CPU affinity. The CPU affinity of a ThreadContext can be configured two ways: (1) Obtaining the thread id via the "thread-id" property and setting the CPU affinity manually (e.g., via taskset). (2) Setting the "cpu-affinity" property and letting QEMU try set the CPU affinity itself. This will fail if QEMU doesn't have permissions to do so anymore after seccomp was initialized. A simple QEMU example to set the CPU affinity to host CPU 0,1,6,7 would be: qemu-system-x86_64 -S \ -object thread-context,id=tc1,cpu-affinity=0-1,cpu-affinity=6-7 And we can query it via HMP/QMP: (qemu) qom-get tc1 cpu-affinity [ 0, 1, 6, 7 ] But note that due to dynamic library loading this example will not work before we actually make use of thread_context_create_thread() in QEMU code, because the type will otherwise not get registered. We'll wire this up next to make it work. In general, the interface behaves like pthread_setaffinity_np(): host CPU numbers that are currently not available are ignored; only host CPU numbers that are impossible with the current kernel will fail. If the list of host CPU numbers does not include a single CPU that is available, setting the CPU affinity will fail. A ThreadContext can be reused, simply by reconfiguring the CPU affinity. Note that the CPU affinity of previously created threads will not get adjusted. Reviewed-by: Michal Privoznik <mprivozn@redhat.com> Acked-by: Markus Armbruster <armbru@redhat.com> Message-Id: <20221014134720.168738-4-david@redhat.com> Signed-off-by: David Hildenbrand <david@redhat.com>
2022-10-14 13:47:16 +00:00
#
# @node-affinity: the list of host node numbers that will be resolved
# to a list of host CPU numbers used as CPU affinity. This is a
# shortcut for specifying the list of host CPU numbers belonging
# to the host nodes manually by setting @cpu-affinity.
# (default: QEMU main thread affinity)
util: Add write-only "node-affinity" property for ThreadContext Let's make it easier to pin threads created via a ThreadContext to all host CPUs currently belonging to a given set of host NUMA nodes -- which is the common case. "node-affinity" is simply a shortcut for setting "cpu-affinity" manually to the list of host CPUs belonging to the set of host nodes. This property can only be written. A simple QEMU example to set the CPU affinity to host node 1 on a system with two nodes, 24 CPUs each, whereby odd-numbered host CPUs belong to host node 1: qemu-system-x86_64 -S \ -object thread-context,id=tc1,node-affinity=1 And we can query the cpu-affinity via HMP/QMP: (qemu) qom-get tc1 cpu-affinity [ 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33, 35, 37, 39, 41, 43, 45, 47 ] We cannot query the node-affinity: (qemu) qom-get tc1 node-affinity Error: Insufficient permission to perform this operation But note that due to dynamic library loading this example will not work before we actually make use of thread_context_create_thread() in QEMU code, because the type will otherwise not get registered. We'll wire this up next to make it work. Note that if the host CPUs for a host node change due do CPU hot(un)plug CPU onlining/offlining (i.e., lscpu output changes) after the ThreadContext was started, the CPU affinity will not get updated. Reviewed-by: Michal Privoznik <mprivozn@redhat.com> Acked-by: Markus Armbruster <armbru@redhat.com> Message-Id: <20221014134720.168738-5-david@redhat.com> Signed-off-by: David Hildenbrand <david@redhat.com>
2022-10-14 13:47:17 +00:00
#
util: Introduce ThreadContext user-creatable object Setting the CPU affinity of QEMU threads is a bit problematic, because QEMU doesn't always have permissions to set the CPU affinity itself, for example, with seccomp after initialized by QEMU: -sandbox enable=on,resourcecontrol=deny General information about CPU affinities can be found in the man page of taskset: CPU affinity is a scheduler property that "bonds" a process to a given set of CPUs on the system. The Linux scheduler will honor the given CPU affinity and the process will not run on any other CPUs. While upper layers are already aware of how to handle CPU affinities for long-lived threads like iothreads or vcpu threads, especially short-lived threads, as used for memory-backend preallocation, are more involved to handle. These threads are created on demand and upper layers are not even able to identify and configure them. Introduce the concept of a ThreadContext, that is essentially a thread used for creating new threads. All threads created via that context thread inherit the configured CPU affinity. Consequently, it's sufficient to create a ThreadContext and configure it once, and have all threads created via that ThreadContext inherit the same CPU affinity. The CPU affinity of a ThreadContext can be configured two ways: (1) Obtaining the thread id via the "thread-id" property and setting the CPU affinity manually (e.g., via taskset). (2) Setting the "cpu-affinity" property and letting QEMU try set the CPU affinity itself. This will fail if QEMU doesn't have permissions to do so anymore after seccomp was initialized. A simple QEMU example to set the CPU affinity to host CPU 0,1,6,7 would be: qemu-system-x86_64 -S \ -object thread-context,id=tc1,cpu-affinity=0-1,cpu-affinity=6-7 And we can query it via HMP/QMP: (qemu) qom-get tc1 cpu-affinity [ 0, 1, 6, 7 ] But note that due to dynamic library loading this example will not work before we actually make use of thread_context_create_thread() in QEMU code, because the type will otherwise not get registered. We'll wire this up next to make it work. In general, the interface behaves like pthread_setaffinity_np(): host CPU numbers that are currently not available are ignored; only host CPU numbers that are impossible with the current kernel will fail. If the list of host CPU numbers does not include a single CPU that is available, setting the CPU affinity will fail. A ThreadContext can be reused, simply by reconfiguring the CPU affinity. Note that the CPU affinity of previously created threads will not get adjusted. Reviewed-by: Michal Privoznik <mprivozn@redhat.com> Acked-by: Markus Armbruster <armbru@redhat.com> Message-Id: <20221014134720.168738-4-david@redhat.com> Signed-off-by: David Hildenbrand <david@redhat.com>
2022-10-14 13:47:16 +00:00
# Since: 7.2
##
{ 'struct': 'ThreadContextProperties',
util: Add write-only "node-affinity" property for ThreadContext Let's make it easier to pin threads created via a ThreadContext to all host CPUs currently belonging to a given set of host NUMA nodes -- which is the common case. "node-affinity" is simply a shortcut for setting "cpu-affinity" manually to the list of host CPUs belonging to the set of host nodes. This property can only be written. A simple QEMU example to set the CPU affinity to host node 1 on a system with two nodes, 24 CPUs each, whereby odd-numbered host CPUs belong to host node 1: qemu-system-x86_64 -S \ -object thread-context,id=tc1,node-affinity=1 And we can query the cpu-affinity via HMP/QMP: (qemu) qom-get tc1 cpu-affinity [ 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33, 35, 37, 39, 41, 43, 45, 47 ] We cannot query the node-affinity: (qemu) qom-get tc1 node-affinity Error: Insufficient permission to perform this operation But note that due to dynamic library loading this example will not work before we actually make use of thread_context_create_thread() in QEMU code, because the type will otherwise not get registered. We'll wire this up next to make it work. Note that if the host CPUs for a host node change due do CPU hot(un)plug CPU onlining/offlining (i.e., lscpu output changes) after the ThreadContext was started, the CPU affinity will not get updated. Reviewed-by: Michal Privoznik <mprivozn@redhat.com> Acked-by: Markus Armbruster <armbru@redhat.com> Message-Id: <20221014134720.168738-5-david@redhat.com> Signed-off-by: David Hildenbrand <david@redhat.com>
2022-10-14 13:47:17 +00:00
'data': { '*cpu-affinity': ['uint16'],
'*node-affinity': ['uint16'] } }
util: Introduce ThreadContext user-creatable object Setting the CPU affinity of QEMU threads is a bit problematic, because QEMU doesn't always have permissions to set the CPU affinity itself, for example, with seccomp after initialized by QEMU: -sandbox enable=on,resourcecontrol=deny General information about CPU affinities can be found in the man page of taskset: CPU affinity is a scheduler property that "bonds" a process to a given set of CPUs on the system. The Linux scheduler will honor the given CPU affinity and the process will not run on any other CPUs. While upper layers are already aware of how to handle CPU affinities for long-lived threads like iothreads or vcpu threads, especially short-lived threads, as used for memory-backend preallocation, are more involved to handle. These threads are created on demand and upper layers are not even able to identify and configure them. Introduce the concept of a ThreadContext, that is essentially a thread used for creating new threads. All threads created via that context thread inherit the configured CPU affinity. Consequently, it's sufficient to create a ThreadContext and configure it once, and have all threads created via that ThreadContext inherit the same CPU affinity. The CPU affinity of a ThreadContext can be configured two ways: (1) Obtaining the thread id via the "thread-id" property and setting the CPU affinity manually (e.g., via taskset). (2) Setting the "cpu-affinity" property and letting QEMU try set the CPU affinity itself. This will fail if QEMU doesn't have permissions to do so anymore after seccomp was initialized. A simple QEMU example to set the CPU affinity to host CPU 0,1,6,7 would be: qemu-system-x86_64 -S \ -object thread-context,id=tc1,cpu-affinity=0-1,cpu-affinity=6-7 And we can query it via HMP/QMP: (qemu) qom-get tc1 cpu-affinity [ 0, 1, 6, 7 ] But note that due to dynamic library loading this example will not work before we actually make use of thread_context_create_thread() in QEMU code, because the type will otherwise not get registered. We'll wire this up next to make it work. In general, the interface behaves like pthread_setaffinity_np(): host CPU numbers that are currently not available are ignored; only host CPU numbers that are impossible with the current kernel will fail. If the list of host CPU numbers does not include a single CPU that is available, setting the CPU affinity will fail. A ThreadContext can be reused, simply by reconfiguring the CPU affinity. Note that the CPU affinity of previously created threads will not get adjusted. Reviewed-by: Michal Privoznik <mprivozn@redhat.com> Acked-by: Markus Armbruster <armbru@redhat.com> Message-Id: <20221014134720.168738-4-david@redhat.com> Signed-off-by: David Hildenbrand <david@redhat.com>
2022-10-14 13:47:16 +00:00
##
# @ObjectType:
#
# Features:
#
# @unstable: Member @x-remote-object is experimental.
#
# Since: 6.0
##
{ 'enum': 'ObjectType',
'data': [
qom: new object to associate device to NUMA node NVIDIA GPU's support MIG (Mult-Instance GPUs) feature [1], which allows partitioning of the GPU device resources (including device memory) into several (upto 8) isolated instances. Each of the partitioned memory needs a dedicated NUMA node to operate. The partitions are not fixed and they can be created/deleted at runtime. Unfortunately Linux OS does not provide a means to dynamically create/destroy NUMA nodes and such feature implementation is not expected to be trivial. The nodes that OS discovers at the boot time while parsing SRAT remains fixed. So we utilize the Generic Initiator (GI) Affinity structures that allows association between nodes and devices. Multiple GI structures per BDF is possible, allowing creation of multiple nodes by exposing unique PXM in each of these structures. Implement the mechanism to build the GI affinity structures as Qemu currently does not. Introduce a new acpi-generic-initiator object to allow host admin link a device with an associated NUMA node. Qemu maintains this association and use this object to build the requisite GI Affinity Structure. When multiple NUMA nodes are associated with a device, it is required to create those many number of acpi-generic-initiator objects, each representing a unique device:node association. Following is one of a decoded GI affinity structure in VM ACPI SRAT. [0C8h 0200 1] Subtable Type : 05 [Generic Initiator Affinity] [0C9h 0201 1] Length : 20 [0CAh 0202 1] Reserved1 : 00 [0CBh 0203 1] Device Handle Type : 01 [0CCh 0204 4] Proximity Domain : 00000007 [0D0h 0208 16] Device Handle : 00 00 20 00 00 00 00 00 00 00 00 00 00 00 00 00 [0E0h 0224 4] Flags (decoded below) : 00000001 Enabled : 1 [0E4h 0228 4] Reserved2 : 00000000 [0E8h 0232 1] Subtable Type : 05 [Generic Initiator Affinity] [0E9h 0233 1] Length : 20 An admin can provide a range of acpi-generic-initiator objects, each associating a device (by providing the id through pci-dev argument) to the desired NUMA node (using the node argument). Currently, only PCI device is supported. For the grace hopper system, create a range of 8 nodes and associate that with the device using the acpi-generic-initiator object. While a configuration of less than 8 nodes per device is allowed, such configuration will prevent utilization of the feature to the fullest. The following sample creates 8 nodes per PCI device for a VM with 2 PCI devices and link them to the respecitve PCI device using acpi-generic-initiator objects: -numa node,nodeid=2 -numa node,nodeid=3 -numa node,nodeid=4 \ -numa node,nodeid=5 -numa node,nodeid=6 -numa node,nodeid=7 \ -numa node,nodeid=8 -numa node,nodeid=9 \ -device vfio-pci-nohotplug,host=0009:01:00.0,bus=pcie.0,addr=04.0,rombar=0,id=dev0 \ -object acpi-generic-initiator,id=gi0,pci-dev=dev0,node=2 \ -object acpi-generic-initiator,id=gi1,pci-dev=dev0,node=3 \ -object acpi-generic-initiator,id=gi2,pci-dev=dev0,node=4 \ -object acpi-generic-initiator,id=gi3,pci-dev=dev0,node=5 \ -object acpi-generic-initiator,id=gi4,pci-dev=dev0,node=6 \ -object acpi-generic-initiator,id=gi5,pci-dev=dev0,node=7 \ -object acpi-generic-initiator,id=gi6,pci-dev=dev0,node=8 \ -object acpi-generic-initiator,id=gi7,pci-dev=dev0,node=9 \ -numa node,nodeid=10 -numa node,nodeid=11 -numa node,nodeid=12 \ -numa node,nodeid=13 -numa node,nodeid=14 -numa node,nodeid=15 \ -numa node,nodeid=16 -numa node,nodeid=17 \ -device vfio-pci-nohotplug,host=0009:01:01.0,bus=pcie.0,addr=05.0,rombar=0,id=dev1 \ -object acpi-generic-initiator,id=gi8,pci-dev=dev1,node=10 \ -object acpi-generic-initiator,id=gi9,pci-dev=dev1,node=11 \ -object acpi-generic-initiator,id=gi10,pci-dev=dev1,node=12 \ -object acpi-generic-initiator,id=gi11,pci-dev=dev1,node=13 \ -object acpi-generic-initiator,id=gi12,pci-dev=dev1,node=14 \ -object acpi-generic-initiator,id=gi13,pci-dev=dev1,node=15 \ -object acpi-generic-initiator,id=gi14,pci-dev=dev1,node=16 \ -object acpi-generic-initiator,id=gi15,pci-dev=dev1,node=17 \ Link: https://www.nvidia.com/en-in/technologies/multi-instance-gpu [1] Cc: Jonathan Cameron <qemu-devel@nongnu.org> Cc: Alex Williamson <alex.williamson@redhat.com> Cc: Markus Armbruster <armbru@redhat.com> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Acked-by: Markus Armbruster <armbru@redhat.com> Signed-off-by: Ankit Agrawal <ankita@nvidia.com> Message-Id: <20240308145525.10886-2-ankita@nvidia.com> Reviewed-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2024-03-08 14:55:23 +00:00
'acpi-generic-initiator',
'authz-list',
'authz-listfile',
'authz-pam',
'authz-simple',
'can-bus',
{ 'name': 'can-host-socketcan',
'if': 'CONFIG_LINUX' },
'colo-compare',
'cryptodev-backend',
'cryptodev-backend-builtin',
'cryptodev-backend-lkcf',
{ 'name': 'cryptodev-vhost-user',
'if': 'CONFIG_VHOST_CRYPTO' },
'dbus-vmstate',
'filter-buffer',
'filter-dump',
'filter-mirror',
'filter-redirector',
'filter-replay',
'filter-rewriter',
'input-barrier',
{ 'name': 'input-linux',
'if': 'CONFIG_LINUX' },
'iommufd',
'iothread',
'main-loop',
{ 'name': 'memory-backend-epc',
'if': 'CONFIG_LINUX' },
'memory-backend-file',
{ 'name': 'memory-backend-memfd',
'if': 'CONFIG_LINUX' },
'memory-backend-ram',
'pef-guest',
{ 'name': 'pr-manager-helper',
'if': 'CONFIG_LINUX' },
'qtest',
'rng-builtin',
'rng-egd',
{ 'name': 'rng-random',
'if': 'CONFIG_POSIX' },
'secret',
{ 'name': 'secret_keyring',
'if': 'CONFIG_SECRET_KEYRING' },
'sev-guest',
util: Introduce ThreadContext user-creatable object Setting the CPU affinity of QEMU threads is a bit problematic, because QEMU doesn't always have permissions to set the CPU affinity itself, for example, with seccomp after initialized by QEMU: -sandbox enable=on,resourcecontrol=deny General information about CPU affinities can be found in the man page of taskset: CPU affinity is a scheduler property that "bonds" a process to a given set of CPUs on the system. The Linux scheduler will honor the given CPU affinity and the process will not run on any other CPUs. While upper layers are already aware of how to handle CPU affinities for long-lived threads like iothreads or vcpu threads, especially short-lived threads, as used for memory-backend preallocation, are more involved to handle. These threads are created on demand and upper layers are not even able to identify and configure them. Introduce the concept of a ThreadContext, that is essentially a thread used for creating new threads. All threads created via that context thread inherit the configured CPU affinity. Consequently, it's sufficient to create a ThreadContext and configure it once, and have all threads created via that ThreadContext inherit the same CPU affinity. The CPU affinity of a ThreadContext can be configured two ways: (1) Obtaining the thread id via the "thread-id" property and setting the CPU affinity manually (e.g., via taskset). (2) Setting the "cpu-affinity" property and letting QEMU try set the CPU affinity itself. This will fail if QEMU doesn't have permissions to do so anymore after seccomp was initialized. A simple QEMU example to set the CPU affinity to host CPU 0,1,6,7 would be: qemu-system-x86_64 -S \ -object thread-context,id=tc1,cpu-affinity=0-1,cpu-affinity=6-7 And we can query it via HMP/QMP: (qemu) qom-get tc1 cpu-affinity [ 0, 1, 6, 7 ] But note that due to dynamic library loading this example will not work before we actually make use of thread_context_create_thread() in QEMU code, because the type will otherwise not get registered. We'll wire this up next to make it work. In general, the interface behaves like pthread_setaffinity_np(): host CPU numbers that are currently not available are ignored; only host CPU numbers that are impossible with the current kernel will fail. If the list of host CPU numbers does not include a single CPU that is available, setting the CPU affinity will fail. A ThreadContext can be reused, simply by reconfiguring the CPU affinity. Note that the CPU affinity of previously created threads will not get adjusted. Reviewed-by: Michal Privoznik <mprivozn@redhat.com> Acked-by: Markus Armbruster <armbru@redhat.com> Message-Id: <20221014134720.168738-4-david@redhat.com> Signed-off-by: David Hildenbrand <david@redhat.com>
2022-10-14 13:47:16 +00:00
'thread-context',
's390-pv-guest',
'throttle-group',
'tls-creds-anon',
'tls-creds-psk',
'tls-creds-x509',
'tls-cipher-suites',
{ 'name': 'x-remote-object', 'features': [ 'unstable' ] },
{ 'name': 'x-vfio-user-server', 'features': [ 'unstable' ] }
] }
##
# @ObjectOptions:
#
# Describes the options of a user creatable QOM object.
#
# @qom-type: the class name for the object to be created
#
# @id: the name of the new object
#
# Since: 6.0
##
{ 'union': 'ObjectOptions',
'base': { 'qom-type': 'ObjectType',
'id': 'str' },
'discriminator': 'qom-type',
'data': {
qom: new object to associate device to NUMA node NVIDIA GPU's support MIG (Mult-Instance GPUs) feature [1], which allows partitioning of the GPU device resources (including device memory) into several (upto 8) isolated instances. Each of the partitioned memory needs a dedicated NUMA node to operate. The partitions are not fixed and they can be created/deleted at runtime. Unfortunately Linux OS does not provide a means to dynamically create/destroy NUMA nodes and such feature implementation is not expected to be trivial. The nodes that OS discovers at the boot time while parsing SRAT remains fixed. So we utilize the Generic Initiator (GI) Affinity structures that allows association between nodes and devices. Multiple GI structures per BDF is possible, allowing creation of multiple nodes by exposing unique PXM in each of these structures. Implement the mechanism to build the GI affinity structures as Qemu currently does not. Introduce a new acpi-generic-initiator object to allow host admin link a device with an associated NUMA node. Qemu maintains this association and use this object to build the requisite GI Affinity Structure. When multiple NUMA nodes are associated with a device, it is required to create those many number of acpi-generic-initiator objects, each representing a unique device:node association. Following is one of a decoded GI affinity structure in VM ACPI SRAT. [0C8h 0200 1] Subtable Type : 05 [Generic Initiator Affinity] [0C9h 0201 1] Length : 20 [0CAh 0202 1] Reserved1 : 00 [0CBh 0203 1] Device Handle Type : 01 [0CCh 0204 4] Proximity Domain : 00000007 [0D0h 0208 16] Device Handle : 00 00 20 00 00 00 00 00 00 00 00 00 00 00 00 00 [0E0h 0224 4] Flags (decoded below) : 00000001 Enabled : 1 [0E4h 0228 4] Reserved2 : 00000000 [0E8h 0232 1] Subtable Type : 05 [Generic Initiator Affinity] [0E9h 0233 1] Length : 20 An admin can provide a range of acpi-generic-initiator objects, each associating a device (by providing the id through pci-dev argument) to the desired NUMA node (using the node argument). Currently, only PCI device is supported. For the grace hopper system, create a range of 8 nodes and associate that with the device using the acpi-generic-initiator object. While a configuration of less than 8 nodes per device is allowed, such configuration will prevent utilization of the feature to the fullest. The following sample creates 8 nodes per PCI device for a VM with 2 PCI devices and link them to the respecitve PCI device using acpi-generic-initiator objects: -numa node,nodeid=2 -numa node,nodeid=3 -numa node,nodeid=4 \ -numa node,nodeid=5 -numa node,nodeid=6 -numa node,nodeid=7 \ -numa node,nodeid=8 -numa node,nodeid=9 \ -device vfio-pci-nohotplug,host=0009:01:00.0,bus=pcie.0,addr=04.0,rombar=0,id=dev0 \ -object acpi-generic-initiator,id=gi0,pci-dev=dev0,node=2 \ -object acpi-generic-initiator,id=gi1,pci-dev=dev0,node=3 \ -object acpi-generic-initiator,id=gi2,pci-dev=dev0,node=4 \ -object acpi-generic-initiator,id=gi3,pci-dev=dev0,node=5 \ -object acpi-generic-initiator,id=gi4,pci-dev=dev0,node=6 \ -object acpi-generic-initiator,id=gi5,pci-dev=dev0,node=7 \ -object acpi-generic-initiator,id=gi6,pci-dev=dev0,node=8 \ -object acpi-generic-initiator,id=gi7,pci-dev=dev0,node=9 \ -numa node,nodeid=10 -numa node,nodeid=11 -numa node,nodeid=12 \ -numa node,nodeid=13 -numa node,nodeid=14 -numa node,nodeid=15 \ -numa node,nodeid=16 -numa node,nodeid=17 \ -device vfio-pci-nohotplug,host=0009:01:01.0,bus=pcie.0,addr=05.0,rombar=0,id=dev1 \ -object acpi-generic-initiator,id=gi8,pci-dev=dev1,node=10 \ -object acpi-generic-initiator,id=gi9,pci-dev=dev1,node=11 \ -object acpi-generic-initiator,id=gi10,pci-dev=dev1,node=12 \ -object acpi-generic-initiator,id=gi11,pci-dev=dev1,node=13 \ -object acpi-generic-initiator,id=gi12,pci-dev=dev1,node=14 \ -object acpi-generic-initiator,id=gi13,pci-dev=dev1,node=15 \ -object acpi-generic-initiator,id=gi14,pci-dev=dev1,node=16 \ -object acpi-generic-initiator,id=gi15,pci-dev=dev1,node=17 \ Link: https://www.nvidia.com/en-in/technologies/multi-instance-gpu [1] Cc: Jonathan Cameron <qemu-devel@nongnu.org> Cc: Alex Williamson <alex.williamson@redhat.com> Cc: Markus Armbruster <armbru@redhat.com> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Acked-by: Markus Armbruster <armbru@redhat.com> Signed-off-by: Ankit Agrawal <ankita@nvidia.com> Message-Id: <20240308145525.10886-2-ankita@nvidia.com> Reviewed-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2024-03-08 14:55:23 +00:00
'acpi-generic-initiator': 'AcpiGenericInitiatorProperties',
'authz-list': 'AuthZListProperties',
'authz-listfile': 'AuthZListFileProperties',
'authz-pam': 'AuthZPAMProperties',
'authz-simple': 'AuthZSimpleProperties',
'can-host-socketcan': { 'type': 'CanHostSocketcanProperties',
'if': 'CONFIG_LINUX' },
'colo-compare': 'ColoCompareProperties',
'cryptodev-backend': 'CryptodevBackendProperties',
'cryptodev-backend-builtin': 'CryptodevBackendProperties',
'cryptodev-backend-lkcf': 'CryptodevBackendProperties',
'cryptodev-vhost-user': { 'type': 'CryptodevVhostUserProperties',
'if': 'CONFIG_VHOST_CRYPTO' },
'dbus-vmstate': 'DBusVMStateProperties',
'filter-buffer': 'FilterBufferProperties',
'filter-dump': 'FilterDumpProperties',
'filter-mirror': 'FilterMirrorProperties',
'filter-redirector': 'FilterRedirectorProperties',
'filter-replay': 'NetfilterProperties',
'filter-rewriter': 'FilterRewriterProperties',
'input-barrier': 'InputBarrierProperties',
'input-linux': { 'type': 'InputLinuxProperties',
'if': 'CONFIG_LINUX' },
'iommufd': 'IOMMUFDProperties',
'iothread': 'IothreadProperties',
'main-loop': 'MainLoopProperties',
'memory-backend-epc': { 'type': 'MemoryBackendEpcProperties',
'if': 'CONFIG_LINUX' },
'memory-backend-file': 'MemoryBackendFileProperties',
'memory-backend-memfd': { 'type': 'MemoryBackendMemfdProperties',
'if': 'CONFIG_LINUX' },
'memory-backend-ram': 'MemoryBackendProperties',
'pr-manager-helper': { 'type': 'PrManagerHelperProperties',
'if': 'CONFIG_LINUX' },
'qtest': 'QtestProperties',
'rng-builtin': 'RngProperties',
'rng-egd': 'RngEgdProperties',
'rng-random': { 'type': 'RngRandomProperties',
'if': 'CONFIG_POSIX' },
'secret': 'SecretProperties',
'secret_keyring': { 'type': 'SecretKeyringProperties',
'if': 'CONFIG_SECRET_KEYRING' },
'sev-guest': 'SevGuestProperties',
util: Introduce ThreadContext user-creatable object Setting the CPU affinity of QEMU threads is a bit problematic, because QEMU doesn't always have permissions to set the CPU affinity itself, for example, with seccomp after initialized by QEMU: -sandbox enable=on,resourcecontrol=deny General information about CPU affinities can be found in the man page of taskset: CPU affinity is a scheduler property that "bonds" a process to a given set of CPUs on the system. The Linux scheduler will honor the given CPU affinity and the process will not run on any other CPUs. While upper layers are already aware of how to handle CPU affinities for long-lived threads like iothreads or vcpu threads, especially short-lived threads, as used for memory-backend preallocation, are more involved to handle. These threads are created on demand and upper layers are not even able to identify and configure them. Introduce the concept of a ThreadContext, that is essentially a thread used for creating new threads. All threads created via that context thread inherit the configured CPU affinity. Consequently, it's sufficient to create a ThreadContext and configure it once, and have all threads created via that ThreadContext inherit the same CPU affinity. The CPU affinity of a ThreadContext can be configured two ways: (1) Obtaining the thread id via the "thread-id" property and setting the CPU affinity manually (e.g., via taskset). (2) Setting the "cpu-affinity" property and letting QEMU try set the CPU affinity itself. This will fail if QEMU doesn't have permissions to do so anymore after seccomp was initialized. A simple QEMU example to set the CPU affinity to host CPU 0,1,6,7 would be: qemu-system-x86_64 -S \ -object thread-context,id=tc1,cpu-affinity=0-1,cpu-affinity=6-7 And we can query it via HMP/QMP: (qemu) qom-get tc1 cpu-affinity [ 0, 1, 6, 7 ] But note that due to dynamic library loading this example will not work before we actually make use of thread_context_create_thread() in QEMU code, because the type will otherwise not get registered. We'll wire this up next to make it work. In general, the interface behaves like pthread_setaffinity_np(): host CPU numbers that are currently not available are ignored; only host CPU numbers that are impossible with the current kernel will fail. If the list of host CPU numbers does not include a single CPU that is available, setting the CPU affinity will fail. A ThreadContext can be reused, simply by reconfiguring the CPU affinity. Note that the CPU affinity of previously created threads will not get adjusted. Reviewed-by: Michal Privoznik <mprivozn@redhat.com> Acked-by: Markus Armbruster <armbru@redhat.com> Message-Id: <20221014134720.168738-4-david@redhat.com> Signed-off-by: David Hildenbrand <david@redhat.com>
2022-10-14 13:47:16 +00:00
'thread-context': 'ThreadContextProperties',
'throttle-group': 'ThrottleGroupProperties',
'tls-creds-anon': 'TlsCredsAnonProperties',
'tls-creds-psk': 'TlsCredsPskProperties',
'tls-creds-x509': 'TlsCredsX509Properties',
'tls-cipher-suites': 'TlsCredsProperties',
'x-remote-object': 'RemoteObjectProperties',
'x-vfio-user-server': 'VfioUserServerProperties'
} }
##
# @object-add:
#
# Create a QOM object.
#
# Errors:
# - Error if @qom-type is not a valid class name
#
# Since: 2.0
#
# Example:
#
# -> { "execute": "object-add",
# "arguments": { "qom-type": "rng-random", "id": "rng1",
# "filename": "/dev/hwrng" } }
# <- { "return": {} }
##
{ 'command': 'object-add', 'data': 'ObjectOptions', 'boxed': true,
'allow-preconfig': true }
##
# @object-del:
#
# Remove a QOM object.
#
# @id: the name of the QOM object to remove
#
# Errors:
# - Error if @id is not a valid id for a QOM object
#
# Since: 2.0
#
# Example:
#
# -> { "execute": "object-del", "arguments": { "id": "rng1" } }
# <- { "return": {} }
##
{ 'command': 'object-del', 'data': {'id': 'str'},
'allow-preconfig': true }