linux/include/uapi/drm/i915_drm.h

3890 lines
128 KiB
C
Raw Normal View History

/*
* Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef _UAPI_I915_DRM_H_
#define _UAPI_I915_DRM_H_
#include "drm.h"
#if defined(__cplusplus)
extern "C" {
#endif
/* Please note that modifications to all structs defined here are
* subject to backwards-compatibility constraints.
*/
/**
* DOC: uevents generated by i915 on its device node
*
* I915_L3_PARITY_UEVENT - Generated when the driver receives a parity mismatch
* event from the GPU L3 cache. Additional information supplied is ROW,
* BANK, SUBBANK, SLICE of the affected cacheline. Userspace should keep
* track of these events, and if a specific cache-line seems to have a
* persistent error, remap it with the L3 remapping tool supplied in
* intel-gpu-tools. The value supplied with the event is always 1.
*
* I915_ERROR_UEVENT - Generated upon error detection, currently only via
* hangcheck. The error detection event is a good indicator of when things
* began to go badly. The value supplied with the event is a 1 upon error
* detection, and a 0 upon reset completion, signifying no more error
* exists. NOTE: Disabling hangcheck or reset via module parameter will
* cause the related events to not be seen.
*
* I915_RESET_UEVENT - Event is generated just before an attempt to reset the
* GPU. The value supplied with the event is always 1. NOTE: Disable
* reset via module parameter will cause this event to not be seen.
*/
#define I915_L3_PARITY_UEVENT "L3_PARITY_ERROR"
#define I915_ERROR_UEVENT "ERROR"
#define I915_RESET_UEVENT "RESET"
/**
* struct i915_user_extension - Base class for defining a chain of extensions
drm/i915: Introduce the i915_user_extension_method An idea for extending uABI inspired by Vulkan's extension chains. Instead of expanding the data struct for each ioctl every time we need to add a new feature, define an extension chain instead. As we add optional interfaces to control the ioctl, we define a new extension struct that can be linked into the ioctl data only when required by the user. The key advantage being able to ignore large control structs for optional interfaces/extensions, while being able to process them in a consistent manner. In comparison to other extensible ioctls, the key difference is the use of a linked chain of extension structs vs an array of tagged pointers. For example, struct drm_amdgpu_cs_chunk { __u32 chunk_id; __u32 length_dw; __u64 chunk_data; }; struct drm_amdgpu_cs_in { __u32 ctx_id; __u32 bo_list_handle; __u32 num_chunks; __u32 _pad; __u64 chunks; }; allows userspace to pass in array of pointers to extension structs, but must therefore keep constructing that array along side the command stream. In dynamic situations like that, a linked list is preferred and does not similar from extra cache line misses as the extension structs themselves must still be loaded separate to the chunks array. v2: Apply the tail call optimisation directly to nip the worry of stack overflow in the bud. v3: Defend against recursion. v4: Fixup local types to match new uabi Opens: - do we include the result as an out-field in each chain? struct i915_user_extension { __u64 next_extension; __u64 name; __s32 result; __u32 mbz; /* reserved for future use */ }; * Undecided, so provision some room for future expansion. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20190322092325.5883-1-chris@chris-wilson.co.uk
2019-03-22 09:23:22 +00:00
*
* Many interfaces need to grow over time. In most cases we can simply
* extend the struct and have userspace pass in more data. Another option,
* as demonstrated by Vulkan's approach to providing extensions for forward
* and backward compatibility, is to use a list of optional structs to
* provide those extra details.
*
* The key advantage to using an extension chain is that it allows us to
* redefine the interface more easily than an ever growing struct of
* increasing complexity, and for large parts of that interface to be
* entirely optional. The downside is more pointer chasing; chasing across
* the __user boundary with pointers encapsulated inside u64.
*
* Example chaining:
*
* .. code-block:: C
*
* struct i915_user_extension ext3 {
* .next_extension = 0, // end
* .name = ...,
* };
* struct i915_user_extension ext2 {
* .next_extension = (uintptr_t)&ext3,
* .name = ...,
* };
* struct i915_user_extension ext1 {
* .next_extension = (uintptr_t)&ext2,
* .name = ...,
* };
*
* Typically the struct i915_user_extension would be embedded in some uAPI
* struct, and in this case we would feed it the head of the chain(i.e ext1),
* which would then apply all of the above extensions.
*
drm/i915: Introduce the i915_user_extension_method An idea for extending uABI inspired by Vulkan's extension chains. Instead of expanding the data struct for each ioctl every time we need to add a new feature, define an extension chain instead. As we add optional interfaces to control the ioctl, we define a new extension struct that can be linked into the ioctl data only when required by the user. The key advantage being able to ignore large control structs for optional interfaces/extensions, while being able to process them in a consistent manner. In comparison to other extensible ioctls, the key difference is the use of a linked chain of extension structs vs an array of tagged pointers. For example, struct drm_amdgpu_cs_chunk { __u32 chunk_id; __u32 length_dw; __u64 chunk_data; }; struct drm_amdgpu_cs_in { __u32 ctx_id; __u32 bo_list_handle; __u32 num_chunks; __u32 _pad; __u64 chunks; }; allows userspace to pass in array of pointers to extension structs, but must therefore keep constructing that array along side the command stream. In dynamic situations like that, a linked list is preferred and does not similar from extra cache line misses as the extension structs themselves must still be loaded separate to the chunks array. v2: Apply the tail call optimisation directly to nip the worry of stack overflow in the bud. v3: Defend against recursion. v4: Fixup local types to match new uabi Opens: - do we include the result as an out-field in each chain? struct i915_user_extension { __u64 next_extension; __u64 name; __s32 result; __u32 mbz; /* reserved for future use */ }; * Undecided, so provision some room for future expansion. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20190322092325.5883-1-chris@chris-wilson.co.uk
2019-03-22 09:23:22 +00:00
*/
struct i915_user_extension {
/**
* @next_extension:
*
* Pointer to the next struct i915_user_extension, or zero if the end.
*/
drm/i915: Introduce the i915_user_extension_method An idea for extending uABI inspired by Vulkan's extension chains. Instead of expanding the data struct for each ioctl every time we need to add a new feature, define an extension chain instead. As we add optional interfaces to control the ioctl, we define a new extension struct that can be linked into the ioctl data only when required by the user. The key advantage being able to ignore large control structs for optional interfaces/extensions, while being able to process them in a consistent manner. In comparison to other extensible ioctls, the key difference is the use of a linked chain of extension structs vs an array of tagged pointers. For example, struct drm_amdgpu_cs_chunk { __u32 chunk_id; __u32 length_dw; __u64 chunk_data; }; struct drm_amdgpu_cs_in { __u32 ctx_id; __u32 bo_list_handle; __u32 num_chunks; __u32 _pad; __u64 chunks; }; allows userspace to pass in array of pointers to extension structs, but must therefore keep constructing that array along side the command stream. In dynamic situations like that, a linked list is preferred and does not similar from extra cache line misses as the extension structs themselves must still be loaded separate to the chunks array. v2: Apply the tail call optimisation directly to nip the worry of stack overflow in the bud. v3: Defend against recursion. v4: Fixup local types to match new uabi Opens: - do we include the result as an out-field in each chain? struct i915_user_extension { __u64 next_extension; __u64 name; __s32 result; __u32 mbz; /* reserved for future use */ }; * Undecided, so provision some room for future expansion. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20190322092325.5883-1-chris@chris-wilson.co.uk
2019-03-22 09:23:22 +00:00
__u64 next_extension;
/**
* @name: Name of the extension.
*
* Note that the name here is just some integer.
*
* Also note that the name space for this is not global for the whole
* driver, but rather its scope/meaning is limited to the specific piece
* of uAPI which has embedded the struct i915_user_extension.
*/
drm/i915: Introduce the i915_user_extension_method An idea for extending uABI inspired by Vulkan's extension chains. Instead of expanding the data struct for each ioctl every time we need to add a new feature, define an extension chain instead. As we add optional interfaces to control the ioctl, we define a new extension struct that can be linked into the ioctl data only when required by the user. The key advantage being able to ignore large control structs for optional interfaces/extensions, while being able to process them in a consistent manner. In comparison to other extensible ioctls, the key difference is the use of a linked chain of extension structs vs an array of tagged pointers. For example, struct drm_amdgpu_cs_chunk { __u32 chunk_id; __u32 length_dw; __u64 chunk_data; }; struct drm_amdgpu_cs_in { __u32 ctx_id; __u32 bo_list_handle; __u32 num_chunks; __u32 _pad; __u64 chunks; }; allows userspace to pass in array of pointers to extension structs, but must therefore keep constructing that array along side the command stream. In dynamic situations like that, a linked list is preferred and does not similar from extra cache line misses as the extension structs themselves must still be loaded separate to the chunks array. v2: Apply the tail call optimisation directly to nip the worry of stack overflow in the bud. v3: Defend against recursion. v4: Fixup local types to match new uabi Opens: - do we include the result as an out-field in each chain? struct i915_user_extension { __u64 next_extension; __u64 name; __s32 result; __u32 mbz; /* reserved for future use */ }; * Undecided, so provision some room for future expansion. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20190322092325.5883-1-chris@chris-wilson.co.uk
2019-03-22 09:23:22 +00:00
__u32 name;
/**
* @flags: MBZ
*
* All undefined bits must be zero.
*/
__u32 flags;
/**
* @rsvd: MBZ
*
* Reserved for future use; must be zero.
*/
__u32 rsvd[4];
drm/i915: Introduce the i915_user_extension_method An idea for extending uABI inspired by Vulkan's extension chains. Instead of expanding the data struct for each ioctl every time we need to add a new feature, define an extension chain instead. As we add optional interfaces to control the ioctl, we define a new extension struct that can be linked into the ioctl data only when required by the user. The key advantage being able to ignore large control structs for optional interfaces/extensions, while being able to process them in a consistent manner. In comparison to other extensible ioctls, the key difference is the use of a linked chain of extension structs vs an array of tagged pointers. For example, struct drm_amdgpu_cs_chunk { __u32 chunk_id; __u32 length_dw; __u64 chunk_data; }; struct drm_amdgpu_cs_in { __u32 ctx_id; __u32 bo_list_handle; __u32 num_chunks; __u32 _pad; __u64 chunks; }; allows userspace to pass in array of pointers to extension structs, but must therefore keep constructing that array along side the command stream. In dynamic situations like that, a linked list is preferred and does not similar from extra cache line misses as the extension structs themselves must still be loaded separate to the chunks array. v2: Apply the tail call optimisation directly to nip the worry of stack overflow in the bud. v3: Defend against recursion. v4: Fixup local types to match new uabi Opens: - do we include the result as an out-field in each chain? struct i915_user_extension { __u64 next_extension; __u64 name; __s32 result; __u32 mbz; /* reserved for future use */ }; * Undecided, so provision some room for future expansion. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20190322092325.5883-1-chris@chris-wilson.co.uk
2019-03-22 09:23:22 +00:00
};
/*
* MOCS indexes used for GPU surfaces, defining the cacheability of the
* surface data and the coherency for this data wrt. CPU vs. GPU accesses.
*/
enum i915_mocs_table_index {
/*
* Not cached anywhere, coherency between CPU and GPU accesses is
* guaranteed.
*/
I915_MOCS_UNCACHED,
/*
* Cacheability and coherency controlled by the kernel automatically
* based on the DRM_I915_GEM_SET_CACHING IOCTL setting and the current
* usage of the surface (used for display scanout or not).
*/
I915_MOCS_PTE,
/*
* Cached in all GPU caches available on the platform.
* Coherency between CPU and GPU accesses to the surface is not
* guaranteed without extra synchronization.
*/
I915_MOCS_CACHED,
};
/**
* enum drm_i915_gem_engine_class - uapi engine type enumeration
*
* Different engines serve different roles, and there may be more than one
* engine serving each role. This enum provides a classification of the role
* of the engine, which may be used when requesting operations to be performed
* on a certain subset of engines, or for providing information about that
* group.
*/
enum drm_i915_gem_engine_class {
/**
* @I915_ENGINE_CLASS_RENDER:
*
* Render engines support instructions used for 3D, Compute (GPGPU),
* and programmable media workloads. These instructions fetch data and
* dispatch individual work items to threads that operate in parallel.
* The threads run small programs (called "kernels" or "shaders") on
* the GPU's execution units (EUs).
*/
I915_ENGINE_CLASS_RENDER = 0,
/**
* @I915_ENGINE_CLASS_COPY:
*
* Copy engines (also referred to as "blitters") support instructions
* that move blocks of data from one location in memory to another,
* or that fill a specified location of memory with fixed data.
* Copy engines can perform pre-defined logical or bitwise operations
* on the source, destination, or pattern data.
*/
I915_ENGINE_CLASS_COPY = 1,
/**
* @I915_ENGINE_CLASS_VIDEO:
*
* Video engines (also referred to as "bit stream decode" (BSD) or
* "vdbox") support instructions that perform fixed-function media
* decode and encode.
*/
I915_ENGINE_CLASS_VIDEO = 2,
/**
* @I915_ENGINE_CLASS_VIDEO_ENHANCE:
*
* Video enhancement engines (also referred to as "vebox") support
* instructions related to image enhancement.
*/
I915_ENGINE_CLASS_VIDEO_ENHANCE = 3,
/**
* @I915_ENGINE_CLASS_COMPUTE:
*
* Compute engines support a subset of the instructions available
* on render engines: compute engines support Compute (GPGPU) and
* programmable media workloads, but do not support the 3D pipeline.
*/
I915_ENGINE_CLASS_COMPUTE = 4,
/* Values in this enum should be kept compact. */
/**
* @I915_ENGINE_CLASS_INVALID:
*
* Placeholder value to represent an invalid engine class assignment.
*/
I915_ENGINE_CLASS_INVALID = -1
};
drm/i915/uapi: Add DRM_I915_QUERY_GEOMETRY_SUBSLICES Newer platforms have DSS that aren't necessarily available for both geometry and compute, two queries will need to exist. This introduces the first, when passing a valid engine class and engine instance in the flags returns a topology describing geometry. Based on past discussion, we currently only support this new query item on Xe_HP and beyond; earlier platforms do not need to worry about geometry and compute pipelines having access to different topology and should continue to use the existing topology query. v2: fix white space errors v3: change flags from hosting 2 8 bit numbers to holding a i915_engine_class_instance struct v4: add error if non rcs engine passed. v5 (by MattR): - Improve kerneldoc and cross references to related structs/enums. (Daniel) - Clarify that geometry query is only supported on render engines (Francisco) - Clarify that the new query is only supported on Xe_HP+. - Fix checkpatch warnings. Cc: Ashutosh Dixit <ashutosh.dixit@intel.com> Cc: Matt Roper <matthew.d.roper@intel.com> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Francisco Jerez <currojerez@riseup.net> UMD (mesa): https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14143 Testcase: igt@i915_query@test-query-geometry-subslices Signed-off-by: Matt Atwood <matthew.s.atwood@intel.com> Signed-off-by: Matt Roper <matthew.d.roper@intel.com> Reviewed-by: Francisco Jerez <currojerez@riseup.net> Link: https://patchwork.freedesktop.org/patch/msgid/20220414192230.749771-4-matthew.d.roper@intel.com
2022-04-14 19:22:30 +00:00
/**
* struct i915_engine_class_instance - Engine class/instance identifier
*
* There may be more than one engine fulfilling any role within the system.
* Each engine of a class is given a unique instance number and therefore
* any engine can be specified by its class:instance tuplet. APIs that allow
* access to any engine in the system will use struct i915_engine_class_instance
* for this identification.
*/
struct i915_engine_class_instance {
drm/i915/uapi: Add DRM_I915_QUERY_GEOMETRY_SUBSLICES Newer platforms have DSS that aren't necessarily available for both geometry and compute, two queries will need to exist. This introduces the first, when passing a valid engine class and engine instance in the flags returns a topology describing geometry. Based on past discussion, we currently only support this new query item on Xe_HP and beyond; earlier platforms do not need to worry about geometry and compute pipelines having access to different topology and should continue to use the existing topology query. v2: fix white space errors v3: change flags from hosting 2 8 bit numbers to holding a i915_engine_class_instance struct v4: add error if non rcs engine passed. v5 (by MattR): - Improve kerneldoc and cross references to related structs/enums. (Daniel) - Clarify that geometry query is only supported on render engines (Francisco) - Clarify that the new query is only supported on Xe_HP+. - Fix checkpatch warnings. Cc: Ashutosh Dixit <ashutosh.dixit@intel.com> Cc: Matt Roper <matthew.d.roper@intel.com> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Francisco Jerez <currojerez@riseup.net> UMD (mesa): https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14143 Testcase: igt@i915_query@test-query-geometry-subslices Signed-off-by: Matt Atwood <matthew.s.atwood@intel.com> Signed-off-by: Matt Roper <matthew.d.roper@intel.com> Reviewed-by: Francisco Jerez <currojerez@riseup.net> Link: https://patchwork.freedesktop.org/patch/msgid/20220414192230.749771-4-matthew.d.roper@intel.com
2022-04-14 19:22:30 +00:00
/**
* @engine_class:
*
* Engine class from enum drm_i915_gem_engine_class
*/
__u16 engine_class;
drm/i915: Allow a context to define its set of engines Over the last few years, we have debated how to extend the user API to support an increase in the number of engines, that may be sparse and even be heterogeneous within a class (not all video decoders created equal). We settled on using (class, instance) tuples to identify a specific engine, with an API for the user to construct a map of engines to capabilities. Into this picture, we then add a challenge of virtual engines; one user engine that maps behind the scenes to any number of physical engines. To keep it general, we want the user to have full control over that mapping. To that end, we allow the user to constrain a context to define the set of engines that it can access, order fully controlled by the user via (class, instance). With such precise control in context setup, we can continue to use the existing execbuf uABI of specifying a single index; only now it doesn't automagically map onto the engines, it uses the user defined engine map from the context. v2: Fixup freeing of local on success of get_engines() v3: Allow empty engines[] v4: s/nengine/num_engines/ v5: Replace 64 limit on num_engines with a note that execbuf is currently limited to only using the first 64 engines. v6: Actually use the engines_mutex to guard the ctx->engines. Testcase: igt/gem_ctx_engines Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20190521211134.16117-2-chris@chris-wilson.co.uk
2019-05-21 21:11:26 +00:00
#define I915_ENGINE_CLASS_INVALID_NONE -1
drm/i915: Load balancing across a virtual engine Having allowed the user to define a set of engines that they will want to only use, we go one step further and allow them to bind those engines into a single virtual instance. Submitting a batch to the virtual engine will then forward it to any one of the set in a manner as best to distribute load. The virtual engine has a single timeline across all engines (it operates as a single queue), so it is not able to concurrently run batches across multiple engines by itself; that is left up to the user to submit multiple concurrent batches to multiple queues. Multiple users will be load balanced across the system. The mechanism used for load balancing in this patch is a late greedy balancer. When a request is ready for execution, it is added to each engine's queue, and when an engine is ready for its next request it claims it from the virtual engine. The first engine to do so, wins, i.e. the request is executed at the earliest opportunity (idle moment) in the system. As not all HW is created equal, the user is still able to skip the virtual engine and execute the batch on a specific engine, all within the same queue. It will then be executed in order on the correct engine, with execution on other virtual engines being moved away due to the load detection. A couple of areas for potential improvement left! - The virtual engine always take priority over equal-priority tasks. Mostly broken up by applying FQ_CODEL rules for prioritising new clients, and hopefully the virtual and real engines are not then congested (i.e. all work is via virtual engines, or all work is to the real engine). - We require the breadcrumb irq around every virtual engine request. For normal engines, we eliminate the need for the slow round trip via interrupt by using the submit fence and queueing in order. For virtual engines, we have to allow any job to transfer to a new ring, and cannot coalesce the submissions, so require the completion fence instead, forcing the persistent use of interrupts. - We only drip feed single requests through each virtual engine and onto the physical engines, even if there was enough work to fill all ELSP, leaving small stalls with an idle CS event at the end of every request. Could we be greedy and fill both slots? Being lazy is virtuous for load distribution on less-than-full workloads though. Other areas of improvement are more general, such as reducing lock contention, reducing dispatch overhead, looking at direct submission rather than bouncing around tasklets etc. sseu: Lift the restriction to allow sseu to be reconfigured on virtual engines composed of RENDER_CLASS (rcs). v2: macroize check_user_mbz() v3: Cancel virtual engines on wedging v4: Commence commenting v5: Replace 64b sibling_mask with a list of class:instance v6: Drop the one-element array in the uabi v7: Assert it is an virtual engine in to_virtual_engine() v8: Skip over holes in [class][inst] so we can selftest with (vcs0, vcs2) Link: https://github.com/intel/media-driver/pull/283 Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20190521211134.16117-6-chris@chris-wilson.co.uk
2019-05-21 21:11:30 +00:00
#define I915_ENGINE_CLASS_INVALID_VIRTUAL -2
drm/i915/uapi: Add DRM_I915_QUERY_GEOMETRY_SUBSLICES Newer platforms have DSS that aren't necessarily available for both geometry and compute, two queries will need to exist. This introduces the first, when passing a valid engine class and engine instance in the flags returns a topology describing geometry. Based on past discussion, we currently only support this new query item on Xe_HP and beyond; earlier platforms do not need to worry about geometry and compute pipelines having access to different topology and should continue to use the existing topology query. v2: fix white space errors v3: change flags from hosting 2 8 bit numbers to holding a i915_engine_class_instance struct v4: add error if non rcs engine passed. v5 (by MattR): - Improve kerneldoc and cross references to related structs/enums. (Daniel) - Clarify that geometry query is only supported on render engines (Francisco) - Clarify that the new query is only supported on Xe_HP+. - Fix checkpatch warnings. Cc: Ashutosh Dixit <ashutosh.dixit@intel.com> Cc: Matt Roper <matthew.d.roper@intel.com> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Francisco Jerez <currojerez@riseup.net> UMD (mesa): https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14143 Testcase: igt@i915_query@test-query-geometry-subslices Signed-off-by: Matt Atwood <matthew.s.atwood@intel.com> Signed-off-by: Matt Roper <matthew.d.roper@intel.com> Reviewed-by: Francisco Jerez <currojerez@riseup.net> Link: https://patchwork.freedesktop.org/patch/msgid/20220414192230.749771-4-matthew.d.roper@intel.com
2022-04-14 19:22:30 +00:00
/**
* @engine_instance:
*
* Engine instance.
*/
__u16 engine_instance;
};
drm/i915/pmu: Expose a PMU interface for perf queries From: Chris Wilson <chris@chris-wilson.co.uk> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com> From: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com> The first goal is to be able to measure GPU (and invidual ring) busyness without having to poll registers from userspace. (Which not only incurs holding the forcewake lock indefinitely, perturbing the system, but also runs the risk of hanging the machine.) As an alternative we can use the perf event counter interface to sample the ring registers periodically and send those results to userspace. Functionality we are exporting to userspace is via the existing perf PMU API and can be exercised via the existing tools. For example: perf stat -a -e i915/rcs0-busy/ -I 1000 Will print the render engine busynnes once per second. All the performance counters can be enumerated (perf list) and have their unit of measure correctly reported in sysfs. v1-v2 (Chris Wilson): v2: Use a common timer for the ring sampling. v3: (Tvrtko Ursulin) * Decouple uAPI from i915 engine ids. * Complete uAPI defines. * Refactor some code to helpers for clarity. * Skip sampling disabled engines. * Expose counters in sysfs. * Pass in fake regs to avoid null ptr deref in perf core. * Convert to class/instance uAPI. * Use shared driver code for rc6 residency, power and frequency. v4: (Dmitry Rogozhkin) * Register PMU with .task_ctx_nr=perf_invalid_context * Expose cpumask for the PMU with the single CPU in the mask * Properly support pmu->stop(): it should call pmu->read() * Properly support pmu->del(): it should call stop(event, PERF_EF_UPDATE) * Introduce refcounting of event subscriptions. * Make pmu.busy_stats a refcounter to avoid busy stats going away with some deleted event. * Expose cpumask for i915 PMU to avoid multiple events creation of the same type followed by counter aggregation by perf-stat. * Track CPUs getting online/offline to migrate perf context. If (likely) cpumask will initially set CPU0, CONFIG_BOOTPARAM_HOTPLUG_CPU0 will be needed to see effect of CPU status tracking. * End result is that only global events are supported and perf stat works correctly. * Deny perf driver level sampling - it is prohibited for uncore PMU. v5: (Tvrtko Ursulin) * Don't hardcode number of engine samplers. * Rewrite event ref-counting for correctness and simplicity. * Store initial counter value when starting already enabled events to correctly report values to all listeners. * Fix RC6 residency readout. * Comments, GPL header. v6: * Add missing entry to v4 changelog. * Fix accounting in CPU hotplug case by copying the approach from arch/x86/events/intel/cstate.c. (Dmitry Rogozhkin) v7: * Log failure message only on failure. * Remove CPU hotplug notification state on unregister. v8: * Fix error unwind on failed registration. * Checkpatch cleanup. v9: * Drop the energy metric, it is available via intel_rapl_perf. (Ville Syrjälä) * Use HAS_RC6(p). (Chris Wilson) * Handle unsupported non-engine events. (Dmitry Rogozhkin) * Rebase for intel_rc6_residency_ns needing caller managed runtime pm. * Drop HAS_RC6 checks from the read callback since creating those events will be rejected at init time already. * Add counter units to sysfs so perf stat output is nicer. * Cleanup the attribute tables for brevity and readability. v10: * Fixed queued accounting. v11: * Move intel_engine_lookup_user to intel_engine_cs.c * Commit update. (Joonas Lahtinen) v12: * More accurate sampling. (Chris Wilson) * Store and report frequency in MHz for better usability from perf stat. * Removed metrics: queued, interrupts, rc6 counters. * Sample engine busyness based on seqno difference only for less MMIO (and forcewake) on all platforms. (Chris Wilson) v13: * Comment spelling, use mul_u32_u32 to work around potential GCC issue and somne code alignment changes. (Chris Wilson) v14: * Rebase. v15: * Rebase for RPS refactoring. v16: * Use the dynamic slot in the CPU hotplug state machine so that we are free to setup our state as multi-instance. Previously we were re-using the CPUHP_AP_PERF_X86_UNCORE_ONLINE slot which is neither used as multi-instance, nor owned by our driver to start with. * Register the CPU hotplug handlers after the PMU, otherwise the callback will get called before the PMU is initialized which can end up in perf_pmu_migrate_context with an un-initialized base. * Added workaround for a probable bug in cpuhp core. v17: * Remove workaround for the cpuhp bug. v18: * Rebase for drm_i915_gem_engine_class getting upstream before us. v19: * Rebase. (trivial) Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Signed-off-by: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com> Cc: Peter Zijlstra <peterz@infradead.org> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20171121181852.16128-2-tvrtko.ursulin@linux.intel.com
2017-11-21 18:18:45 +00:00
/**
* DOC: perf_events exposed by i915 through /sys/bus/event_sources/drivers/i915
*
*/
enum drm_i915_pmu_engine_sample {
I915_SAMPLE_BUSY = 0,
I915_SAMPLE_WAIT = 1,
I915_SAMPLE_SEMA = 2
drm/i915/pmu: Expose a PMU interface for perf queries From: Chris Wilson <chris@chris-wilson.co.uk> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com> From: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com> The first goal is to be able to measure GPU (and invidual ring) busyness without having to poll registers from userspace. (Which not only incurs holding the forcewake lock indefinitely, perturbing the system, but also runs the risk of hanging the machine.) As an alternative we can use the perf event counter interface to sample the ring registers periodically and send those results to userspace. Functionality we are exporting to userspace is via the existing perf PMU API and can be exercised via the existing tools. For example: perf stat -a -e i915/rcs0-busy/ -I 1000 Will print the render engine busynnes once per second. All the performance counters can be enumerated (perf list) and have their unit of measure correctly reported in sysfs. v1-v2 (Chris Wilson): v2: Use a common timer for the ring sampling. v3: (Tvrtko Ursulin) * Decouple uAPI from i915 engine ids. * Complete uAPI defines. * Refactor some code to helpers for clarity. * Skip sampling disabled engines. * Expose counters in sysfs. * Pass in fake regs to avoid null ptr deref in perf core. * Convert to class/instance uAPI. * Use shared driver code for rc6 residency, power and frequency. v4: (Dmitry Rogozhkin) * Register PMU with .task_ctx_nr=perf_invalid_context * Expose cpumask for the PMU with the single CPU in the mask * Properly support pmu->stop(): it should call pmu->read() * Properly support pmu->del(): it should call stop(event, PERF_EF_UPDATE) * Introduce refcounting of event subscriptions. * Make pmu.busy_stats a refcounter to avoid busy stats going away with some deleted event. * Expose cpumask for i915 PMU to avoid multiple events creation of the same type followed by counter aggregation by perf-stat. * Track CPUs getting online/offline to migrate perf context. If (likely) cpumask will initially set CPU0, CONFIG_BOOTPARAM_HOTPLUG_CPU0 will be needed to see effect of CPU status tracking. * End result is that only global events are supported and perf stat works correctly. * Deny perf driver level sampling - it is prohibited for uncore PMU. v5: (Tvrtko Ursulin) * Don't hardcode number of engine samplers. * Rewrite event ref-counting for correctness and simplicity. * Store initial counter value when starting already enabled events to correctly report values to all listeners. * Fix RC6 residency readout. * Comments, GPL header. v6: * Add missing entry to v4 changelog. * Fix accounting in CPU hotplug case by copying the approach from arch/x86/events/intel/cstate.c. (Dmitry Rogozhkin) v7: * Log failure message only on failure. * Remove CPU hotplug notification state on unregister. v8: * Fix error unwind on failed registration. * Checkpatch cleanup. v9: * Drop the energy metric, it is available via intel_rapl_perf. (Ville Syrjälä) * Use HAS_RC6(p). (Chris Wilson) * Handle unsupported non-engine events. (Dmitry Rogozhkin) * Rebase for intel_rc6_residency_ns needing caller managed runtime pm. * Drop HAS_RC6 checks from the read callback since creating those events will be rejected at init time already. * Add counter units to sysfs so perf stat output is nicer. * Cleanup the attribute tables for brevity and readability. v10: * Fixed queued accounting. v11: * Move intel_engine_lookup_user to intel_engine_cs.c * Commit update. (Joonas Lahtinen) v12: * More accurate sampling. (Chris Wilson) * Store and report frequency in MHz for better usability from perf stat. * Removed metrics: queued, interrupts, rc6 counters. * Sample engine busyness based on seqno difference only for less MMIO (and forcewake) on all platforms. (Chris Wilson) v13: * Comment spelling, use mul_u32_u32 to work around potential GCC issue and somne code alignment changes. (Chris Wilson) v14: * Rebase. v15: * Rebase for RPS refactoring. v16: * Use the dynamic slot in the CPU hotplug state machine so that we are free to setup our state as multi-instance. Previously we were re-using the CPUHP_AP_PERF_X86_UNCORE_ONLINE slot which is neither used as multi-instance, nor owned by our driver to start with. * Register the CPU hotplug handlers after the PMU, otherwise the callback will get called before the PMU is initialized which can end up in perf_pmu_migrate_context with an un-initialized base. * Added workaround for a probable bug in cpuhp core. v17: * Remove workaround for the cpuhp bug. v18: * Rebase for drm_i915_gem_engine_class getting upstream before us. v19: * Rebase. (trivial) Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Signed-off-by: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com> Cc: Peter Zijlstra <peterz@infradead.org> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20171121181852.16128-2-tvrtko.ursulin@linux.intel.com
2017-11-21 18:18:45 +00:00
};
#define I915_PMU_SAMPLE_BITS (4)
#define I915_PMU_SAMPLE_MASK (0xf)
#define I915_PMU_SAMPLE_INSTANCE_BITS (8)
#define I915_PMU_CLASS_SHIFT \
(I915_PMU_SAMPLE_BITS + I915_PMU_SAMPLE_INSTANCE_BITS)
#define __I915_PMU_ENGINE(class, instance, sample) \
((class) << I915_PMU_CLASS_SHIFT | \
(instance) << I915_PMU_SAMPLE_BITS | \
(sample))
#define I915_PMU_ENGINE_BUSY(class, instance) \
__I915_PMU_ENGINE(class, instance, I915_SAMPLE_BUSY)
#define I915_PMU_ENGINE_WAIT(class, instance) \
__I915_PMU_ENGINE(class, instance, I915_SAMPLE_WAIT)
#define I915_PMU_ENGINE_SEMA(class, instance) \
__I915_PMU_ENGINE(class, instance, I915_SAMPLE_SEMA)
/*
* Top 4 bits of every non-engine counter are GT id.
*/
#define __I915_PMU_GT_SHIFT (60)
#define ___I915_PMU_OTHER(gt, x) \
(((__u64)__I915_PMU_ENGINE(0xff, 0xff, 0xf) + 1 + (x)) | \
((__u64)(gt) << __I915_PMU_GT_SHIFT))
#define __I915_PMU_OTHER(x) ___I915_PMU_OTHER(0, x)
drm/i915/pmu: Expose a PMU interface for perf queries From: Chris Wilson <chris@chris-wilson.co.uk> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com> From: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com> The first goal is to be able to measure GPU (and invidual ring) busyness without having to poll registers from userspace. (Which not only incurs holding the forcewake lock indefinitely, perturbing the system, but also runs the risk of hanging the machine.) As an alternative we can use the perf event counter interface to sample the ring registers periodically and send those results to userspace. Functionality we are exporting to userspace is via the existing perf PMU API and can be exercised via the existing tools. For example: perf stat -a -e i915/rcs0-busy/ -I 1000 Will print the render engine busynnes once per second. All the performance counters can be enumerated (perf list) and have their unit of measure correctly reported in sysfs. v1-v2 (Chris Wilson): v2: Use a common timer for the ring sampling. v3: (Tvrtko Ursulin) * Decouple uAPI from i915 engine ids. * Complete uAPI defines. * Refactor some code to helpers for clarity. * Skip sampling disabled engines. * Expose counters in sysfs. * Pass in fake regs to avoid null ptr deref in perf core. * Convert to class/instance uAPI. * Use shared driver code for rc6 residency, power and frequency. v4: (Dmitry Rogozhkin) * Register PMU with .task_ctx_nr=perf_invalid_context * Expose cpumask for the PMU with the single CPU in the mask * Properly support pmu->stop(): it should call pmu->read() * Properly support pmu->del(): it should call stop(event, PERF_EF_UPDATE) * Introduce refcounting of event subscriptions. * Make pmu.busy_stats a refcounter to avoid busy stats going away with some deleted event. * Expose cpumask for i915 PMU to avoid multiple events creation of the same type followed by counter aggregation by perf-stat. * Track CPUs getting online/offline to migrate perf context. If (likely) cpumask will initially set CPU0, CONFIG_BOOTPARAM_HOTPLUG_CPU0 will be needed to see effect of CPU status tracking. * End result is that only global events are supported and perf stat works correctly. * Deny perf driver level sampling - it is prohibited for uncore PMU. v5: (Tvrtko Ursulin) * Don't hardcode number of engine samplers. * Rewrite event ref-counting for correctness and simplicity. * Store initial counter value when starting already enabled events to correctly report values to all listeners. * Fix RC6 residency readout. * Comments, GPL header. v6: * Add missing entry to v4 changelog. * Fix accounting in CPU hotplug case by copying the approach from arch/x86/events/intel/cstate.c. (Dmitry Rogozhkin) v7: * Log failure message only on failure. * Remove CPU hotplug notification state on unregister. v8: * Fix error unwind on failed registration. * Checkpatch cleanup. v9: * Drop the energy metric, it is available via intel_rapl_perf. (Ville Syrjälä) * Use HAS_RC6(p). (Chris Wilson) * Handle unsupported non-engine events. (Dmitry Rogozhkin) * Rebase for intel_rc6_residency_ns needing caller managed runtime pm. * Drop HAS_RC6 checks from the read callback since creating those events will be rejected at init time already. * Add counter units to sysfs so perf stat output is nicer. * Cleanup the attribute tables for brevity and readability. v10: * Fixed queued accounting. v11: * Move intel_engine_lookup_user to intel_engine_cs.c * Commit update. (Joonas Lahtinen) v12: * More accurate sampling. (Chris Wilson) * Store and report frequency in MHz for better usability from perf stat. * Removed metrics: queued, interrupts, rc6 counters. * Sample engine busyness based on seqno difference only for less MMIO (and forcewake) on all platforms. (Chris Wilson) v13: * Comment spelling, use mul_u32_u32 to work around potential GCC issue and somne code alignment changes. (Chris Wilson) v14: * Rebase. v15: * Rebase for RPS refactoring. v16: * Use the dynamic slot in the CPU hotplug state machine so that we are free to setup our state as multi-instance. Previously we were re-using the CPUHP_AP_PERF_X86_UNCORE_ONLINE slot which is neither used as multi-instance, nor owned by our driver to start with. * Register the CPU hotplug handlers after the PMU, otherwise the callback will get called before the PMU is initialized which can end up in perf_pmu_migrate_context with an un-initialized base. * Added workaround for a probable bug in cpuhp core. v17: * Remove workaround for the cpuhp bug. v18: * Rebase for drm_i915_gem_engine_class getting upstream before us. v19: * Rebase. (trivial) Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Signed-off-by: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com> Cc: Peter Zijlstra <peterz@infradead.org> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20171121181852.16128-2-tvrtko.ursulin@linux.intel.com
2017-11-21 18:18:45 +00:00
#define I915_PMU_ACTUAL_FREQUENCY __I915_PMU_OTHER(0)
#define I915_PMU_REQUESTED_FREQUENCY __I915_PMU_OTHER(1)
#define I915_PMU_INTERRUPTS __I915_PMU_OTHER(2)
#define I915_PMU_RC6_RESIDENCY __I915_PMU_OTHER(3)
#define I915_PMU_SOFTWARE_GT_AWAKE_TIME __I915_PMU_OTHER(4)
#define I915_PMU_LAST /* Deprecated - do not use */ I915_PMU_RC6_RESIDENCY
drm/i915/pmu: Expose a PMU interface for perf queries From: Chris Wilson <chris@chris-wilson.co.uk> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com> From: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com> The first goal is to be able to measure GPU (and invidual ring) busyness without having to poll registers from userspace. (Which not only incurs holding the forcewake lock indefinitely, perturbing the system, but also runs the risk of hanging the machine.) As an alternative we can use the perf event counter interface to sample the ring registers periodically and send those results to userspace. Functionality we are exporting to userspace is via the existing perf PMU API and can be exercised via the existing tools. For example: perf stat -a -e i915/rcs0-busy/ -I 1000 Will print the render engine busynnes once per second. All the performance counters can be enumerated (perf list) and have their unit of measure correctly reported in sysfs. v1-v2 (Chris Wilson): v2: Use a common timer for the ring sampling. v3: (Tvrtko Ursulin) * Decouple uAPI from i915 engine ids. * Complete uAPI defines. * Refactor some code to helpers for clarity. * Skip sampling disabled engines. * Expose counters in sysfs. * Pass in fake regs to avoid null ptr deref in perf core. * Convert to class/instance uAPI. * Use shared driver code for rc6 residency, power and frequency. v4: (Dmitry Rogozhkin) * Register PMU with .task_ctx_nr=perf_invalid_context * Expose cpumask for the PMU with the single CPU in the mask * Properly support pmu->stop(): it should call pmu->read() * Properly support pmu->del(): it should call stop(event, PERF_EF_UPDATE) * Introduce refcounting of event subscriptions. * Make pmu.busy_stats a refcounter to avoid busy stats going away with some deleted event. * Expose cpumask for i915 PMU to avoid multiple events creation of the same type followed by counter aggregation by perf-stat. * Track CPUs getting online/offline to migrate perf context. If (likely) cpumask will initially set CPU0, CONFIG_BOOTPARAM_HOTPLUG_CPU0 will be needed to see effect of CPU status tracking. * End result is that only global events are supported and perf stat works correctly. * Deny perf driver level sampling - it is prohibited for uncore PMU. v5: (Tvrtko Ursulin) * Don't hardcode number of engine samplers. * Rewrite event ref-counting for correctness and simplicity. * Store initial counter value when starting already enabled events to correctly report values to all listeners. * Fix RC6 residency readout. * Comments, GPL header. v6: * Add missing entry to v4 changelog. * Fix accounting in CPU hotplug case by copying the approach from arch/x86/events/intel/cstate.c. (Dmitry Rogozhkin) v7: * Log failure message only on failure. * Remove CPU hotplug notification state on unregister. v8: * Fix error unwind on failed registration. * Checkpatch cleanup. v9: * Drop the energy metric, it is available via intel_rapl_perf. (Ville Syrjälä) * Use HAS_RC6(p). (Chris Wilson) * Handle unsupported non-engine events. (Dmitry Rogozhkin) * Rebase for intel_rc6_residency_ns needing caller managed runtime pm. * Drop HAS_RC6 checks from the read callback since creating those events will be rejected at init time already. * Add counter units to sysfs so perf stat output is nicer. * Cleanup the attribute tables for brevity and readability. v10: * Fixed queued accounting. v11: * Move intel_engine_lookup_user to intel_engine_cs.c * Commit update. (Joonas Lahtinen) v12: * More accurate sampling. (Chris Wilson) * Store and report frequency in MHz for better usability from perf stat. * Removed metrics: queued, interrupts, rc6 counters. * Sample engine busyness based on seqno difference only for less MMIO (and forcewake) on all platforms. (Chris Wilson) v13: * Comment spelling, use mul_u32_u32 to work around potential GCC issue and somne code alignment changes. (Chris Wilson) v14: * Rebase. v15: * Rebase for RPS refactoring. v16: * Use the dynamic slot in the CPU hotplug state machine so that we are free to setup our state as multi-instance. Previously we were re-using the CPUHP_AP_PERF_X86_UNCORE_ONLINE slot which is neither used as multi-instance, nor owned by our driver to start with. * Register the CPU hotplug handlers after the PMU, otherwise the callback will get called before the PMU is initialized which can end up in perf_pmu_migrate_context with an un-initialized base. * Added workaround for a probable bug in cpuhp core. v17: * Remove workaround for the cpuhp bug. v18: * Rebase for drm_i915_gem_engine_class getting upstream before us. v19: * Rebase. (trivial) Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Signed-off-by: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com> Cc: Peter Zijlstra <peterz@infradead.org> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20171121181852.16128-2-tvrtko.ursulin@linux.intel.com
2017-11-21 18:18:45 +00:00
#define __I915_PMU_ACTUAL_FREQUENCY(gt) ___I915_PMU_OTHER(gt, 0)
#define __I915_PMU_REQUESTED_FREQUENCY(gt) ___I915_PMU_OTHER(gt, 1)
#define __I915_PMU_INTERRUPTS(gt) ___I915_PMU_OTHER(gt, 2)
#define __I915_PMU_RC6_RESIDENCY(gt) ___I915_PMU_OTHER(gt, 3)
#define __I915_PMU_SOFTWARE_GT_AWAKE_TIME(gt) ___I915_PMU_OTHER(gt, 4)
/* Each region is a minimum of 16k, and there are at most 255 of them.
*/
#define I915_NR_TEX_REGIONS 255 /* table size 2k - maximum due to use
* of chars for next/prev indices */
#define I915_LOG_MIN_TEX_REGION_SIZE 14
typedef struct _drm_i915_init {
enum {
I915_INIT_DMA = 0x01,
I915_CLEANUP_DMA = 0x02,
I915_RESUME_DMA = 0x03
} func;
unsigned int mmio_offset;
int sarea_priv_offset;
unsigned int ring_start;
unsigned int ring_end;
unsigned int ring_size;
unsigned int front_offset;
unsigned int back_offset;
unsigned int depth_offset;
unsigned int w;
unsigned int h;
unsigned int pitch;
unsigned int pitch_bits;
unsigned int back_pitch;
unsigned int depth_pitch;
unsigned int cpp;
unsigned int chipset;
} drm_i915_init_t;
typedef struct _drm_i915_sarea {
struct drm_tex_region texList[I915_NR_TEX_REGIONS + 1];
int last_upload; /* last time texture was uploaded */
int last_enqueue; /* last time a buffer was enqueued */
int last_dispatch; /* age of the most recently dispatched buffer */
int ctxOwner; /* last context to upload state */
int texAge;
int pf_enabled; /* is pageflipping allowed? */
int pf_active;
int pf_current_page; /* which buffer is being displayed? */
int perf_boxes; /* performance boxes to be displayed */
int width, height; /* screen size in pixels */
drm_handle_t front_handle;
int front_offset;
int front_size;
drm_handle_t back_handle;
int back_offset;
int back_size;
drm_handle_t depth_handle;
int depth_offset;
int depth_size;
drm_handle_t tex_handle;
int tex_offset;
int tex_size;
int log_tex_granularity;
int pitch;
int rotation; /* 0, 90, 180 or 270 */
int rotated_offset;
int rotated_size;
int rotated_pitch;
int virtualX, virtualY;
unsigned int front_tiled;
unsigned int back_tiled;
unsigned int depth_tiled;
unsigned int rotated_tiled;
unsigned int rotated2_tiled;
int pipeA_x;
int pipeA_y;
int pipeA_w;
int pipeA_h;
int pipeB_x;
int pipeB_y;
int pipeB_w;
int pipeB_h;
/* fill out some space for old userspace triple buffer */
drm_handle_t unused_handle;
__u32 unused1, unused2, unused3;
/* buffer object handles for static buffers. May change
* over the lifetime of the client.
*/
__u32 front_bo_handle;
__u32 back_bo_handle;
__u32 unused_bo_handle;
__u32 depth_bo_handle;
} drm_i915_sarea_t;
/* due to userspace building against these headers we need some compat here */
#define planeA_x pipeA_x
#define planeA_y pipeA_y
#define planeA_w pipeA_w
#define planeA_h pipeA_h
#define planeB_x pipeB_x
#define planeB_y pipeB_y
#define planeB_w pipeB_w
#define planeB_h pipeB_h
/* Flags for perf_boxes
*/
#define I915_BOX_RING_EMPTY 0x1
#define I915_BOX_FLIP 0x2
#define I915_BOX_WAIT 0x4
#define I915_BOX_TEXTURE_LOAD 0x8
#define I915_BOX_LOST_CONTEXT 0x10
/*
* i915 specific ioctls.
*
* The device specific ioctl range is [DRM_COMMAND_BASE, DRM_COMMAND_END) ie
* [0x40, 0xa0) (a0 is excluded). The numbers below are defined as offset
* against DRM_COMMAND_BASE and should be between [0x0, 0x60).
*/
#define DRM_I915_INIT 0x00
#define DRM_I915_FLUSH 0x01
#define DRM_I915_FLIP 0x02
#define DRM_I915_BATCHBUFFER 0x03
#define DRM_I915_IRQ_EMIT 0x04
#define DRM_I915_IRQ_WAIT 0x05
#define DRM_I915_GETPARAM 0x06
#define DRM_I915_SETPARAM 0x07
#define DRM_I915_ALLOC 0x08
#define DRM_I915_FREE 0x09
#define DRM_I915_INIT_HEAP 0x0a
#define DRM_I915_CMDBUFFER 0x0b
#define DRM_I915_DESTROY_HEAP 0x0c
#define DRM_I915_SET_VBLANK_PIPE 0x0d
#define DRM_I915_GET_VBLANK_PIPE 0x0e
#define DRM_I915_VBLANK_SWAP 0x0f
#define DRM_I915_HWS_ADDR 0x11
#define DRM_I915_GEM_INIT 0x13
#define DRM_I915_GEM_EXECBUFFER 0x14
#define DRM_I915_GEM_PIN 0x15
#define DRM_I915_GEM_UNPIN 0x16
#define DRM_I915_GEM_BUSY 0x17
#define DRM_I915_GEM_THROTTLE 0x18
#define DRM_I915_GEM_ENTERVT 0x19
#define DRM_I915_GEM_LEAVEVT 0x1a
#define DRM_I915_GEM_CREATE 0x1b
#define DRM_I915_GEM_PREAD 0x1c
#define DRM_I915_GEM_PWRITE 0x1d
#define DRM_I915_GEM_MMAP 0x1e
#define DRM_I915_GEM_SET_DOMAIN 0x1f
#define DRM_I915_GEM_SW_FINISH 0x20
#define DRM_I915_GEM_SET_TILING 0x21
#define DRM_I915_GEM_GET_TILING 0x22
#define DRM_I915_GEM_GET_APERTURE 0x23
#define DRM_I915_GEM_MMAP_GTT 0x24
#define DRM_I915_GET_PIPE_FROM_CRTC_ID 0x25
#define DRM_I915_GEM_MADVISE 0x26
#define DRM_I915_OVERLAY_PUT_IMAGE 0x27
#define DRM_I915_OVERLAY_ATTRS 0x28
#define DRM_I915_GEM_EXECBUFFER2 0x29
#define DRM_I915_GEM_EXECBUFFER2_WR DRM_I915_GEM_EXECBUFFER2
#define DRM_I915_GET_SPRITE_COLORKEY 0x2a
#define DRM_I915_SET_SPRITE_COLORKEY 0x2b
#define DRM_I915_GEM_WAIT 0x2c
#define DRM_I915_GEM_CONTEXT_CREATE 0x2d
#define DRM_I915_GEM_CONTEXT_DESTROY 0x2e
#define DRM_I915_GEM_SET_CACHING 0x2f
#define DRM_I915_GEM_GET_CACHING 0x30
#define DRM_I915_REG_READ 0x31
#define DRM_I915_GET_RESET_STATS 0x32
drm/i915: Introduce mapping of user pages into video memory (userptr) ioctl By exporting the ability to map user address and inserting PTEs representing their backing pages into the GTT, we can exploit UMA in order to utilize normal application data as a texture source or even as a render target (depending upon the capabilities of the chipset). This has a number of uses, with zero-copy downloads to the GPU and efficient readback making the intermixed streaming of CPU and GPU operations fairly efficient. This ability has many widespread implications from faster rendering of client-side software rasterisers (chromium), mitigation of stalls due to read back (firefox) and to faster pipelining of texture data (such as pixel buffer objects in GL or data blobs in CL). v2: Compile with CONFIG_MMU_NOTIFIER v3: We can sleep while performing invalidate-range, which we can utilise to drop our page references prior to the kernel manipulating the vma (for either discard or cloning) and so protect normal users. v4: Only run the invalidate notifier if the range intercepts the bo. v5: Prevent userspace from attempting to GTT mmap non-page aligned buffers v6: Recheck after reacquire mutex for lost mmu. v7: Fix implicit padding of ioctl struct by rounding to next 64bit boundary. v8: Fix rebasing error after forwarding porting the back port. v9: Limit the userptr to page aligned entries. We now expect userspace to handle all the offset-in-page adjustments itself. v10: Prevent vma from being copied across fork to avoid issues with cow. v11: Drop vma behaviour changes -- locking is nigh on impossible. Use a worker to load user pages to avoid lock inversions. v12: Use get_task_mm()/mmput() for correct refcounting of mm. v13: Use a worker to release the mmu_notifier to avoid lock inversion v14: Decouple mmu_notifier from struct_mutex using a custom mmu_notifer with its own locking and tree of objects for each mm/mmu_notifier. v15: Prevent overlapping userptr objects, and invalidate all objects within the mmu_notifier range v16: Fix a typo for iterating over multiple objects in the range and rearrange error path to destroy the mmu_notifier locklessly. Also close a race between invalidate_range and the get_pages_worker. v17: Close a race between get_pages_worker/invalidate_range and fresh allocations of the same userptr range - and notice that struct_mutex was presumed to be held when during creation it wasn't. v18: Sigh. Fix the refactor of st_set_pages() to allocate enough memory for the struct sg_table and to clear it before reporting an error. v19: Always error out on read-only userptr requests as we don't have the hardware infrastructure to support them at the moment. v20: Refuse to implement read-only support until we have the required infrastructure - but reserve the bit in flags for future use. v21: use_mm() is not required for get_user_pages(). It is only meant to be used to fix up the kernel thread's current->mm for use with copy_user(). v22: Use sg_alloc_table_from_pages for that chunky feeling v23: Export a function for sanity checking dma-buf rather than encode userptr details elsewhere, and clean up comments based on suggestions by Bradley. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> Cc: "Gong, Zhipeng" <zhipeng.gong@intel.com> Cc: Akash Goel <akash.goel@intel.com> Cc: "Volkin, Bradley D" <bradley.d.volkin@intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> Reviewed-by: Brad Volkin <bradley.d.volkin@intel.com> [danvet: Frob ioctl allocation to pick the next one - will cause a bit of fuss with create2 apparently, but such are the rules.] [danvet2: oops, forgot to git add after manual patch application] [danvet3: Appease sparse.] Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-05-16 13:22:37 +00:00
#define DRM_I915_GEM_USERPTR 0x33
#define DRM_I915_GEM_CONTEXT_GETPARAM 0x34
#define DRM_I915_GEM_CONTEXT_SETPARAM 0x35
drm/i915: Add i915 perf infrastructure Adds base i915 perf infrastructure for Gen performance metrics. This adds a DRM_IOCTL_I915_PERF_OPEN ioctl that takes an array of uint64 properties to configure a stream of metrics and returns a new fd usable with standard VFS system calls including read() to read typed and sized records; ioctl() to enable or disable capture and poll() to wait for data. A stream is opened something like: uint64_t properties[] = { /* Single context sampling */ DRM_I915_PERF_PROP_CTX_HANDLE, ctx_handle, /* Include OA reports in samples */ DRM_I915_PERF_PROP_SAMPLE_OA, true, /* OA unit configuration */ DRM_I915_PERF_PROP_OA_METRICS_SET, metrics_set_id, DRM_I915_PERF_PROP_OA_FORMAT, report_format, DRM_I915_PERF_PROP_OA_EXPONENT, period_exponent, }; struct drm_i915_perf_open_param parm = { .flags = I915_PERF_FLAG_FD_CLOEXEC | I915_PERF_FLAG_FD_NONBLOCK | I915_PERF_FLAG_DISABLED, .properties_ptr = (uint64_t)properties, .num_properties = sizeof(properties) / 16, }; int fd = drmIoctl(drm_fd, DRM_IOCTL_I915_PERF_OPEN, &param); Records read all start with a common { type, size } header with DRM_I915_PERF_RECORD_SAMPLE being of most interest. Sample records contain an extensible number of fields and it's the DRM_I915_PERF_PROP_SAMPLE_xyz properties given when opening that determine what's included in every sample. No specific streams are supported yet so any attempt to open a stream will return an error. v2: use i915_gem_context_get() - Chris Wilson v3: update read() interface to avoid passing state struct - Chris Wilson fix some rebase fallout, with i915-perf init/deinit v4: s/DRM_IORW/DRM_IOW/ - Emil Velikov Signed-off-by: Robert Bragg <robert@sixbynine.org> Reviewed-by: Matthew Auld <matthew.auld@intel.com> Reviewed-by: Sourab Gupta <sourab.gupta@intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch> Link: http://patchwork.freedesktop.org/patch/msgid/20161107194957.3385-2-robert@sixbynine.org
2016-11-07 19:49:47 +00:00
#define DRM_I915_PERF_OPEN 0x36
drm/i915/perf: Implement I915_PERF_ADD/REMOVE_CONFIG interface The motivation behind this new interface is expose at runtime the creation of new OA configs which can be used as part of the i915 perf open interface. This will enable the kernel to learn new configs which may be experimental, or otherwise not part of the core set currently available through the i915 perf interface. v2: Drop DRM_ERROR for userspace errors (Matthew) Add padding to userspace structure (Matthew) s/guid/uuid/ (Matthew) v3: Use u32 instead of int to iterate through registers (Matthew) v4: Lock access to dynamic config list (Lionel) v5: by Matthew: Fix uninitialized error values Fix incorrect unwiding when opening perf stream Use kmalloc_array() to store register Use uuid_is_valid() to valid config uuids Declare ioctls as write only Check padding members are set to 0 by Lionel: Return ENOENT rather than EINVAL when trying to remove non existing config v6: by Chris: Use ref counts for OA configs Store UUID in drm_i915_perf_oa_config rather then using pointer Shuffle fields of drm_i915_perf_oa_config to avoid padding v7: by Chris Rename uapi pointers fields to end with '_ptr' v8: by Andrzej, Marek, Sebastian Update register whitelisting by Lionel Add more register names for documentation Allow configuration programming in non-paranoid mode Add support for value filter for a couple of registers already programmed in other part of the kernel v9: Documentation fix (Lionel) Allow writing WAIT_FOR_RC6_EXIT only on Gen8+ (Andrzej) v10: Perform read access_ok() on register pointers (Lionel) Signed-off-by: Matthew Auld <matthew.auld@intel.com> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Signed-off-by: Andrzej Datczuk <andrzej.datczuk@intel.com> Reviewed-by: Andrzej Datczuk <andrzej.datczuk@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20170803165812.2373-2-lionel.g.landwerlin@intel.com
2017-08-03 17:05:50 +00:00
#define DRM_I915_PERF_ADD_CONFIG 0x37
#define DRM_I915_PERF_REMOVE_CONFIG 0x38
drm/i915: add query uAPI There are a number of information that are readable from hardware registers and that we would like to make accessible to userspace. One particular example is the topology of the execution units (how are execution units grouped in subslices and slices and also which ones have been fused off for die recovery). At the moment the GET_PARAM ioctl covers some basic needs, but generally is only able to return a single value for each defined parameter. This is a bit problematic with topology descriptions which are array/maps of available units. This change introduces a new ioctl that can deal with requests to fill structures of potentially variable lengths. The user is expected fill a query with length fields set at 0 on the first call, the kernel then sets the length fields to the their expected values. A second call to the kernel with length fields at their expected values will trigger a copy of the data to the pointed memory locations. The scope of this uAPI is only to provide information to userspace, not to allow configuration of the device. v2: Simplify dispatcher code iteration (Tvrtko) Tweak uapi drm_i915_query_item structure (Tvrtko) v3: Rename pad fields into flags (Chris) Return error on flags field != 0 (Chris) Only copy length back to userspace in drm_i915_query_item (Chris) v4: Use array of functions instead of switch (Chris) v5: More comments in uapi (Tvrtko) Return query item errors in length field (All) v6: Tweak uapi comments style to match the coding style (Lionel) v7: Add i915_query.h (Joonas) v8: (Lionel) Change the behavior of the item iterator to report invalid queries into the query item rather than stopping the iteration. This enables userspace applications to query newer items on older kernels and only have failure on the items that are not supported. v9: Edit copyright headers (Joonas) v10: Typos & comments in uapi (Joonas) Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Acked-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20180306122857.27317-6-lionel.g.landwerlin@intel.com
2018-03-06 12:28:56 +00:00
#define DRM_I915_QUERY 0x39
#define DRM_I915_GEM_VM_CREATE 0x3a
#define DRM_I915_GEM_VM_DESTROY 0x3b
drm/i915/uapi: introduce drm_i915_gem_create_ext Same old gem_create but with now with extensions support. This is needed to support various upcoming usecases. v2:(Chris) - Use separate ioctl number for gem_create_ext, instead of hijacking the existing gem_create ioctl, otherwise we run into the issue with being unable to detect if the kernel supports the new extension behaviour. - We now have gem_create_ext.flags, which should be zeroed. - I915_GEM_CREATE_EXT_SETPARAM value is now zero, since this is the index into our array of extensions. - Setup a "vanilla" object which we can directly apply our extensions to. v3:(Daniel & Jason) - drop I915_GEM_CREATE_EXT_SETPARAM. Instead just have each extension do one thing only, instead of generic setparam which can cover various use cases. - add some kernel-doc. Signed-off-by: Matthew Auld <matthew.auld@intel.com> Signed-off-by: CQ Tang <cq.tang@intel.com> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> Cc: Lionel Landwerlin <lionel.g.landwerlin@linux.intel.com> Cc: Jordan Justen <jordan.l.justen@intel.com> Cc: Daniel Vetter <daniel.vetter@intel.com> Cc: Kenneth Graunke <kenneth@whitecape.org> Cc: Jason Ekstrand <jason@jlekstrand.net> Cc: Dave Airlie <airlied@gmail.com> Cc: dri-devel@lists.freedesktop.org Cc: mesa-dev@lists.freedesktop.org Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Link: https://patchwork.freedesktop.org/patch/msgid/20210429103056.407067-5-matthew.auld@intel.com
2021-04-29 10:30:52 +00:00
#define DRM_I915_GEM_CREATE_EXT 0x3c
/* Must be kept compact -- no holes */
#define DRM_IOCTL_I915_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
#define DRM_IOCTL_I915_FLUSH DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
#define DRM_IOCTL_I915_FLIP DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLIP)
#define DRM_IOCTL_I915_BATCHBUFFER DRM_IOW( DRM_COMMAND_BASE + DRM_I915_BATCHBUFFER, drm_i915_batchbuffer_t)
#define DRM_IOCTL_I915_IRQ_EMIT DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_IRQ_EMIT, drm_i915_irq_emit_t)
#define DRM_IOCTL_I915_IRQ_WAIT DRM_IOW( DRM_COMMAND_BASE + DRM_I915_IRQ_WAIT, drm_i915_irq_wait_t)
#define DRM_IOCTL_I915_GETPARAM DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GETPARAM, drm_i915_getparam_t)
#define DRM_IOCTL_I915_SETPARAM DRM_IOW( DRM_COMMAND_BASE + DRM_I915_SETPARAM, drm_i915_setparam_t)
#define DRM_IOCTL_I915_ALLOC DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_ALLOC, drm_i915_mem_alloc_t)
#define DRM_IOCTL_I915_FREE DRM_IOW( DRM_COMMAND_BASE + DRM_I915_FREE, drm_i915_mem_free_t)
#define DRM_IOCTL_I915_INIT_HEAP DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT_HEAP, drm_i915_mem_init_heap_t)
#define DRM_IOCTL_I915_CMDBUFFER DRM_IOW( DRM_COMMAND_BASE + DRM_I915_CMDBUFFER, drm_i915_cmdbuffer_t)
#define DRM_IOCTL_I915_DESTROY_HEAP DRM_IOW( DRM_COMMAND_BASE + DRM_I915_DESTROY_HEAP, drm_i915_mem_destroy_heap_t)
#define DRM_IOCTL_I915_SET_VBLANK_PIPE DRM_IOW( DRM_COMMAND_BASE + DRM_I915_SET_VBLANK_PIPE, drm_i915_vblank_pipe_t)
#define DRM_IOCTL_I915_GET_VBLANK_PIPE DRM_IOR( DRM_COMMAND_BASE + DRM_I915_GET_VBLANK_PIPE, drm_i915_vblank_pipe_t)
#define DRM_IOCTL_I915_VBLANK_SWAP DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_VBLANK_SWAP, drm_i915_vblank_swap_t)
#define DRM_IOCTL_I915_HWS_ADDR DRM_IOW(DRM_COMMAND_BASE + DRM_I915_HWS_ADDR, struct drm_i915_gem_init)
#define DRM_IOCTL_I915_GEM_INIT DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_INIT, struct drm_i915_gem_init)
#define DRM_IOCTL_I915_GEM_EXECBUFFER DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER, struct drm_i915_gem_execbuffer)
#define DRM_IOCTL_I915_GEM_EXECBUFFER2 DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2, struct drm_i915_gem_execbuffer2)
#define DRM_IOCTL_I915_GEM_EXECBUFFER2_WR DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2_WR, struct drm_i915_gem_execbuffer2)
#define DRM_IOCTL_I915_GEM_PIN DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_PIN, struct drm_i915_gem_pin)
#define DRM_IOCTL_I915_GEM_UNPIN DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_UNPIN, struct drm_i915_gem_unpin)
#define DRM_IOCTL_I915_GEM_BUSY DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_BUSY, struct drm_i915_gem_busy)
#define DRM_IOCTL_I915_GEM_SET_CACHING DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_SET_CACHING, struct drm_i915_gem_caching)
#define DRM_IOCTL_I915_GEM_GET_CACHING DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_GET_CACHING, struct drm_i915_gem_caching)
#define DRM_IOCTL_I915_GEM_THROTTLE DRM_IO ( DRM_COMMAND_BASE + DRM_I915_GEM_THROTTLE)
#define DRM_IOCTL_I915_GEM_ENTERVT DRM_IO(DRM_COMMAND_BASE + DRM_I915_GEM_ENTERVT)
#define DRM_IOCTL_I915_GEM_LEAVEVT DRM_IO(DRM_COMMAND_BASE + DRM_I915_GEM_LEAVEVT)
#define DRM_IOCTL_I915_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_CREATE, struct drm_i915_gem_create)
drm/i915/uapi: introduce drm_i915_gem_create_ext Same old gem_create but with now with extensions support. This is needed to support various upcoming usecases. v2:(Chris) - Use separate ioctl number for gem_create_ext, instead of hijacking the existing gem_create ioctl, otherwise we run into the issue with being unable to detect if the kernel supports the new extension behaviour. - We now have gem_create_ext.flags, which should be zeroed. - I915_GEM_CREATE_EXT_SETPARAM value is now zero, since this is the index into our array of extensions. - Setup a "vanilla" object which we can directly apply our extensions to. v3:(Daniel & Jason) - drop I915_GEM_CREATE_EXT_SETPARAM. Instead just have each extension do one thing only, instead of generic setparam which can cover various use cases. - add some kernel-doc. Signed-off-by: Matthew Auld <matthew.auld@intel.com> Signed-off-by: CQ Tang <cq.tang@intel.com> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> Cc: Lionel Landwerlin <lionel.g.landwerlin@linux.intel.com> Cc: Jordan Justen <jordan.l.justen@intel.com> Cc: Daniel Vetter <daniel.vetter@intel.com> Cc: Kenneth Graunke <kenneth@whitecape.org> Cc: Jason Ekstrand <jason@jlekstrand.net> Cc: Dave Airlie <airlied@gmail.com> Cc: dri-devel@lists.freedesktop.org Cc: mesa-dev@lists.freedesktop.org Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Link: https://patchwork.freedesktop.org/patch/msgid/20210429103056.407067-5-matthew.auld@intel.com
2021-04-29 10:30:52 +00:00
#define DRM_IOCTL_I915_GEM_CREATE_EXT DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_CREATE_EXT, struct drm_i915_gem_create_ext)
#define DRM_IOCTL_I915_GEM_PREAD DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_PREAD, struct drm_i915_gem_pread)
#define DRM_IOCTL_I915_GEM_PWRITE DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_PWRITE, struct drm_i915_gem_pwrite)
#define DRM_IOCTL_I915_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MMAP, struct drm_i915_gem_mmap)
#define DRM_IOCTL_I915_GEM_MMAP_GTT DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MMAP_GTT, struct drm_i915_gem_mmap_gtt)
#define DRM_IOCTL_I915_GEM_MMAP_OFFSET DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MMAP_GTT, struct drm_i915_gem_mmap_offset)
#define DRM_IOCTL_I915_GEM_SET_DOMAIN DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_SET_DOMAIN, struct drm_i915_gem_set_domain)
#define DRM_IOCTL_I915_GEM_SW_FINISH DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_SW_FINISH, struct drm_i915_gem_sw_finish)
#define DRM_IOCTL_I915_GEM_SET_TILING DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_SET_TILING, struct drm_i915_gem_set_tiling)
#define DRM_IOCTL_I915_GEM_GET_TILING DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_GET_TILING, struct drm_i915_gem_get_tiling)
#define DRM_IOCTL_I915_GEM_GET_APERTURE DRM_IOR (DRM_COMMAND_BASE + DRM_I915_GEM_GET_APERTURE, struct drm_i915_gem_get_aperture)
#define DRM_IOCTL_I915_GET_PIPE_FROM_CRTC_ID DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GET_PIPE_FROM_CRTC_ID, struct drm_i915_get_pipe_from_crtc_id)
#define DRM_IOCTL_I915_GEM_MADVISE DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MADVISE, struct drm_i915_gem_madvise)
#define DRM_IOCTL_I915_OVERLAY_PUT_IMAGE DRM_IOW(DRM_COMMAND_BASE + DRM_I915_OVERLAY_PUT_IMAGE, struct drm_intel_overlay_put_image)
#define DRM_IOCTL_I915_OVERLAY_ATTRS DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_OVERLAY_ATTRS, struct drm_intel_overlay_attrs)
#define DRM_IOCTL_I915_SET_SPRITE_COLORKEY DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_SET_SPRITE_COLORKEY, struct drm_intel_sprite_colorkey)
#define DRM_IOCTL_I915_GET_SPRITE_COLORKEY DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GET_SPRITE_COLORKEY, struct drm_intel_sprite_colorkey)
#define DRM_IOCTL_I915_GEM_WAIT DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_WAIT, struct drm_i915_gem_wait)
#define DRM_IOCTL_I915_GEM_CONTEXT_CREATE DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_CREATE, struct drm_i915_gem_context_create)
#define DRM_IOCTL_I915_GEM_CONTEXT_CREATE_EXT DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_CREATE, struct drm_i915_gem_context_create_ext)
#define DRM_IOCTL_I915_GEM_CONTEXT_DESTROY DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_DESTROY, struct drm_i915_gem_context_destroy)
#define DRM_IOCTL_I915_REG_READ DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_REG_READ, struct drm_i915_reg_read)
#define DRM_IOCTL_I915_GET_RESET_STATS DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GET_RESET_STATS, struct drm_i915_reset_stats)
drm/i915: Introduce mapping of user pages into video memory (userptr) ioctl By exporting the ability to map user address and inserting PTEs representing their backing pages into the GTT, we can exploit UMA in order to utilize normal application data as a texture source or even as a render target (depending upon the capabilities of the chipset). This has a number of uses, with zero-copy downloads to the GPU and efficient readback making the intermixed streaming of CPU and GPU operations fairly efficient. This ability has many widespread implications from faster rendering of client-side software rasterisers (chromium), mitigation of stalls due to read back (firefox) and to faster pipelining of texture data (such as pixel buffer objects in GL or data blobs in CL). v2: Compile with CONFIG_MMU_NOTIFIER v3: We can sleep while performing invalidate-range, which we can utilise to drop our page references prior to the kernel manipulating the vma (for either discard or cloning) and so protect normal users. v4: Only run the invalidate notifier if the range intercepts the bo. v5: Prevent userspace from attempting to GTT mmap non-page aligned buffers v6: Recheck after reacquire mutex for lost mmu. v7: Fix implicit padding of ioctl struct by rounding to next 64bit boundary. v8: Fix rebasing error after forwarding porting the back port. v9: Limit the userptr to page aligned entries. We now expect userspace to handle all the offset-in-page adjustments itself. v10: Prevent vma from being copied across fork to avoid issues with cow. v11: Drop vma behaviour changes -- locking is nigh on impossible. Use a worker to load user pages to avoid lock inversions. v12: Use get_task_mm()/mmput() for correct refcounting of mm. v13: Use a worker to release the mmu_notifier to avoid lock inversion v14: Decouple mmu_notifier from struct_mutex using a custom mmu_notifer with its own locking and tree of objects for each mm/mmu_notifier. v15: Prevent overlapping userptr objects, and invalidate all objects within the mmu_notifier range v16: Fix a typo for iterating over multiple objects in the range and rearrange error path to destroy the mmu_notifier locklessly. Also close a race between invalidate_range and the get_pages_worker. v17: Close a race between get_pages_worker/invalidate_range and fresh allocations of the same userptr range - and notice that struct_mutex was presumed to be held when during creation it wasn't. v18: Sigh. Fix the refactor of st_set_pages() to allocate enough memory for the struct sg_table and to clear it before reporting an error. v19: Always error out on read-only userptr requests as we don't have the hardware infrastructure to support them at the moment. v20: Refuse to implement read-only support until we have the required infrastructure - but reserve the bit in flags for future use. v21: use_mm() is not required for get_user_pages(). It is only meant to be used to fix up the kernel thread's current->mm for use with copy_user(). v22: Use sg_alloc_table_from_pages for that chunky feeling v23: Export a function for sanity checking dma-buf rather than encode userptr details elsewhere, and clean up comments based on suggestions by Bradley. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> Cc: "Gong, Zhipeng" <zhipeng.gong@intel.com> Cc: Akash Goel <akash.goel@intel.com> Cc: "Volkin, Bradley D" <bradley.d.volkin@intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> Reviewed-by: Brad Volkin <bradley.d.volkin@intel.com> [danvet: Frob ioctl allocation to pick the next one - will cause a bit of fuss with create2 apparently, but such are the rules.] [danvet2: oops, forgot to git add after manual patch application] [danvet3: Appease sparse.] Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-05-16 13:22:37 +00:00
#define DRM_IOCTL_I915_GEM_USERPTR DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_USERPTR, struct drm_i915_gem_userptr)
#define DRM_IOCTL_I915_GEM_CONTEXT_GETPARAM DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_GETPARAM, struct drm_i915_gem_context_param)
#define DRM_IOCTL_I915_GEM_CONTEXT_SETPARAM DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_SETPARAM, struct drm_i915_gem_context_param)
drm/i915: Add i915 perf infrastructure Adds base i915 perf infrastructure for Gen performance metrics. This adds a DRM_IOCTL_I915_PERF_OPEN ioctl that takes an array of uint64 properties to configure a stream of metrics and returns a new fd usable with standard VFS system calls including read() to read typed and sized records; ioctl() to enable or disable capture and poll() to wait for data. A stream is opened something like: uint64_t properties[] = { /* Single context sampling */ DRM_I915_PERF_PROP_CTX_HANDLE, ctx_handle, /* Include OA reports in samples */ DRM_I915_PERF_PROP_SAMPLE_OA, true, /* OA unit configuration */ DRM_I915_PERF_PROP_OA_METRICS_SET, metrics_set_id, DRM_I915_PERF_PROP_OA_FORMAT, report_format, DRM_I915_PERF_PROP_OA_EXPONENT, period_exponent, }; struct drm_i915_perf_open_param parm = { .flags = I915_PERF_FLAG_FD_CLOEXEC | I915_PERF_FLAG_FD_NONBLOCK | I915_PERF_FLAG_DISABLED, .properties_ptr = (uint64_t)properties, .num_properties = sizeof(properties) / 16, }; int fd = drmIoctl(drm_fd, DRM_IOCTL_I915_PERF_OPEN, &param); Records read all start with a common { type, size } header with DRM_I915_PERF_RECORD_SAMPLE being of most interest. Sample records contain an extensible number of fields and it's the DRM_I915_PERF_PROP_SAMPLE_xyz properties given when opening that determine what's included in every sample. No specific streams are supported yet so any attempt to open a stream will return an error. v2: use i915_gem_context_get() - Chris Wilson v3: update read() interface to avoid passing state struct - Chris Wilson fix some rebase fallout, with i915-perf init/deinit v4: s/DRM_IORW/DRM_IOW/ - Emil Velikov Signed-off-by: Robert Bragg <robert@sixbynine.org> Reviewed-by: Matthew Auld <matthew.auld@intel.com> Reviewed-by: Sourab Gupta <sourab.gupta@intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch> Link: http://patchwork.freedesktop.org/patch/msgid/20161107194957.3385-2-robert@sixbynine.org
2016-11-07 19:49:47 +00:00
#define DRM_IOCTL_I915_PERF_OPEN DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_OPEN, struct drm_i915_perf_open_param)
drm/i915/perf: Implement I915_PERF_ADD/REMOVE_CONFIG interface The motivation behind this new interface is expose at runtime the creation of new OA configs which can be used as part of the i915 perf open interface. This will enable the kernel to learn new configs which may be experimental, or otherwise not part of the core set currently available through the i915 perf interface. v2: Drop DRM_ERROR for userspace errors (Matthew) Add padding to userspace structure (Matthew) s/guid/uuid/ (Matthew) v3: Use u32 instead of int to iterate through registers (Matthew) v4: Lock access to dynamic config list (Lionel) v5: by Matthew: Fix uninitialized error values Fix incorrect unwiding when opening perf stream Use kmalloc_array() to store register Use uuid_is_valid() to valid config uuids Declare ioctls as write only Check padding members are set to 0 by Lionel: Return ENOENT rather than EINVAL when trying to remove non existing config v6: by Chris: Use ref counts for OA configs Store UUID in drm_i915_perf_oa_config rather then using pointer Shuffle fields of drm_i915_perf_oa_config to avoid padding v7: by Chris Rename uapi pointers fields to end with '_ptr' v8: by Andrzej, Marek, Sebastian Update register whitelisting by Lionel Add more register names for documentation Allow configuration programming in non-paranoid mode Add support for value filter for a couple of registers already programmed in other part of the kernel v9: Documentation fix (Lionel) Allow writing WAIT_FOR_RC6_EXIT only on Gen8+ (Andrzej) v10: Perform read access_ok() on register pointers (Lionel) Signed-off-by: Matthew Auld <matthew.auld@intel.com> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Signed-off-by: Andrzej Datczuk <andrzej.datczuk@intel.com> Reviewed-by: Andrzej Datczuk <andrzej.datczuk@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20170803165812.2373-2-lionel.g.landwerlin@intel.com
2017-08-03 17:05:50 +00:00
#define DRM_IOCTL_I915_PERF_ADD_CONFIG DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_ADD_CONFIG, struct drm_i915_perf_oa_config)
#define DRM_IOCTL_I915_PERF_REMOVE_CONFIG DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_REMOVE_CONFIG, __u64)
drm/i915: add query uAPI There are a number of information that are readable from hardware registers and that we would like to make accessible to userspace. One particular example is the topology of the execution units (how are execution units grouped in subslices and slices and also which ones have been fused off for die recovery). At the moment the GET_PARAM ioctl covers some basic needs, but generally is only able to return a single value for each defined parameter. This is a bit problematic with topology descriptions which are array/maps of available units. This change introduces a new ioctl that can deal with requests to fill structures of potentially variable lengths. The user is expected fill a query with length fields set at 0 on the first call, the kernel then sets the length fields to the their expected values. A second call to the kernel with length fields at their expected values will trigger a copy of the data to the pointed memory locations. The scope of this uAPI is only to provide information to userspace, not to allow configuration of the device. v2: Simplify dispatcher code iteration (Tvrtko) Tweak uapi drm_i915_query_item structure (Tvrtko) v3: Rename pad fields into flags (Chris) Return error on flags field != 0 (Chris) Only copy length back to userspace in drm_i915_query_item (Chris) v4: Use array of functions instead of switch (Chris) v5: More comments in uapi (Tvrtko) Return query item errors in length field (All) v6: Tweak uapi comments style to match the coding style (Lionel) v7: Add i915_query.h (Joonas) v8: (Lionel) Change the behavior of the item iterator to report invalid queries into the query item rather than stopping the iteration. This enables userspace applications to query newer items on older kernels and only have failure on the items that are not supported. v9: Edit copyright headers (Joonas) v10: Typos & comments in uapi (Joonas) Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Acked-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20180306122857.27317-6-lionel.g.landwerlin@intel.com
2018-03-06 12:28:56 +00:00
#define DRM_IOCTL_I915_QUERY DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_QUERY, struct drm_i915_query)
#define DRM_IOCTL_I915_GEM_VM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_VM_CREATE, struct drm_i915_gem_vm_control)
#define DRM_IOCTL_I915_GEM_VM_DESTROY DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_VM_DESTROY, struct drm_i915_gem_vm_control)
/* Allow drivers to submit batchbuffers directly to hardware, relying
* on the security mechanisms provided by hardware.
*/
typedef struct drm_i915_batchbuffer {
int start; /* agp offset */
int used; /* nr bytes in use */
int DR1; /* hw flags for GFX_OP_DRAWRECT_INFO */
int DR4; /* window origin for GFX_OP_DRAWRECT_INFO */
int num_cliprects; /* mulitpass with multiple cliprects? */
struct drm_clip_rect __user *cliprects; /* pointer to userspace cliprects */
} drm_i915_batchbuffer_t;
/* As above, but pass a pointer to userspace buffer which can be
* validated by the kernel prior to sending to hardware.
*/
typedef struct _drm_i915_cmdbuffer {
char __user *buf; /* pointer to userspace command buffer */
int sz; /* nr bytes in buf */
int DR1; /* hw flags for GFX_OP_DRAWRECT_INFO */
int DR4; /* window origin for GFX_OP_DRAWRECT_INFO */
int num_cliprects; /* mulitpass with multiple cliprects? */
struct drm_clip_rect __user *cliprects; /* pointer to userspace cliprects */
} drm_i915_cmdbuffer_t;
/* Userspace can request & wait on irq's:
*/
typedef struct drm_i915_irq_emit {
int __user *irq_seq;
} drm_i915_irq_emit_t;
typedef struct drm_i915_irq_wait {
int irq_seq;
} drm_i915_irq_wait_t;
/*
* Different modes of per-process Graphics Translation Table,
* see I915_PARAM_HAS_ALIASING_PPGTT
*/
#define I915_GEM_PPGTT_NONE 0
#define I915_GEM_PPGTT_ALIASING 1
#define I915_GEM_PPGTT_FULL 2
/* Ioctl to query kernel params:
*/
#define I915_PARAM_IRQ_ACTIVE 1
#define I915_PARAM_ALLOW_BATCHBUFFER 2
#define I915_PARAM_LAST_DISPATCH 3
#define I915_PARAM_CHIPSET_ID 4
#define I915_PARAM_HAS_GEM 5
#define I915_PARAM_NUM_FENCES_AVAIL 6
#define I915_PARAM_HAS_OVERLAY 7
#define I915_PARAM_HAS_PAGEFLIPPING 8
#define I915_PARAM_HAS_EXECBUF2 9
#define I915_PARAM_HAS_BSD 10
#define I915_PARAM_HAS_BLT 11
#define I915_PARAM_HAS_RELAXED_FENCING 12
#define I915_PARAM_HAS_COHERENT_RINGS 13
#define I915_PARAM_HAS_EXEC_CONSTANTS 14
#define I915_PARAM_HAS_RELAXED_DELTA 15
#define I915_PARAM_HAS_GEN7_SOL_RESET 16
#define I915_PARAM_HAS_LLC 17
#define I915_PARAM_HAS_ALIASING_PPGTT 18
#define I915_PARAM_HAS_WAIT_TIMEOUT 19
#define I915_PARAM_HAS_SEMAPHORES 20
#define I915_PARAM_HAS_PRIME_VMAP_FLUSH 21
#define I915_PARAM_HAS_VEBOX 22
#define I915_PARAM_HAS_SECURE_BATCHES 23
#define I915_PARAM_HAS_PINNED_BATCHES 24
#define I915_PARAM_HAS_EXEC_NO_RELOC 25
#define I915_PARAM_HAS_EXEC_HANDLE_LUT 26
#define I915_PARAM_HAS_WT 27
#define I915_PARAM_CMD_PARSER_VERSION 28
#define I915_PARAM_HAS_COHERENT_PHYS_GTT 29
drm/i915: Support creation of unbound wc user mappings for objects This patch provides support to create write-combining virtual mappings of GEM object. It intends to provide the same funtionality of 'mmap_gtt' interface without the constraints and contention of a limited aperture space, but requires clients handles the linear to tile conversion on their own. This is for improving the CPU write operation performance, as with such mapping, writes and reads are almost 50% faster than with mmap_gtt. Similar to the GTT mmapping, unlike the regular CPU mmapping, it avoids the cache flush after update from CPU side, when object is passed onto GPU. This type of mapping is specially useful in case of sub-region update, i.e. when only a portion of the object is to be updated. Using a CPU mmap in such cases would normally incur a clflush of the whole object, and using a GTT mmapping would likely require eviction of an active object or fence and thus stall. The write-combining CPU mmap avoids both. To ensure the cache coherency, before using this mapping, the GTT domain has been reused here. This provides the required cache flush if the object is in CPU domain or synchronization against the concurrent rendering. Although the access through an uncached mmap should automatically invalidate the cache lines, this may not be true for non-temporal write instructions and also not all pages of the object may be updated at any given point of time through this mapping. Having a call to get_pages in set_to_gtt_domain function, as added in the earlier patch 'drm/i915: Broaden application of set-domain(GTT)', would guarantee the clflush and so there will be no cachelines holding the data for the object before it is accessed through this map. The drm_i915_gem_mmap structure (for the DRM_I915_GEM_MMAP_IOCTL) has been extended with a new flags field (defaulting to 0 for existent users). In order for userspace to detect the extended ioctl, a new parameter I915_PARAM_MMAP_VERSION has been added for versioning the ioctl interface. v2: Fix error handling, invalid flag detection, renaming (ickle) v3: Rebase to latest drm-intel-nightly codebase The new mmapping is exercised by igt/gem_mmap_wc, igt/gem_concurrent_blit and igt/gem_gtt_speed. Change-Id: Ie883942f9e689525f72fe9a8d3780c3a9faa769a Signed-off-by: Akash Goel <akash.goel@intel.com> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-01-02 10:59:30 +00:00
#define I915_PARAM_MMAP_VERSION 30
#define I915_PARAM_HAS_BSD2 31
#define I915_PARAM_REVISION 32
#define I915_PARAM_SUBSLICE_TOTAL 33
#define I915_PARAM_EU_TOTAL 34
#define I915_PARAM_HAS_GPU_RESET 35
#define I915_PARAM_HAS_RESOURCE_STREAMER 36
drm/i915: Add soft-pinning API for execbuffer Userspace can pass in an offset that it presumes the object is located at. The kernel will then do its utmost to fit the object into that location. The assumption is that userspace is handling its own object locations (for example along with full-ppgtt) and that the kernel will rarely have to make space for the user's requests. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> v2: Fixed incorrect eviction found by Michal Winiarski - fix suggested by Chris Wilson. Fixed incorrect error paths causing crash found by Michal Winiarski. (Not published externally) v3: Rebased because of trivial conflict in object_bind_to_vm. Fixed eviction to allow eviction of soft-pinned objects when another soft-pinned object used by a subsequent execbuffer overlaps reported by Michal Winiarski. (Not published externally) v4: Moved soft-pinned objects to the front of ordered_vmas so that they are pinned first after an address conflict happens to avoid repeated conflicts in rare cases (Suggested by Chris Wilson). Expanded comment on drm_i915_gem_exec_object2.offset to cover this new API. v5: Added I915_PARAM_HAS_EXEC_SOFTPIN parameter for detecting this capability (Kristian). Added check for multiple pinnings on eviction (Akash). Made sure buffers are not considered misplaced without the user specifying EXEC_OBJECT_SUPPORTS_48B_ADDRESS. User must assume responsibility for any addressing workarounds. Updated object2.offset field comment again to clarify NO_RELOC case (Chris). checkpatch cleanup. v6: Trivial rebase on latest drm-intel-nightly v7: Catch attempts to pin above the max virtual address size and return EINVAL (Tvrtko). Decouple EXEC_OBJECT_SUPPORTS_48B_ADDRESS and EXEC_OBJECT_PINNED flags, user must pass both flags in any attempt to pin something at an offset above 4GB (Chris, Daniel Vetter). Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Akash Goel <akash.goel@intel.com> Cc: Vinay Belgaumkar <vinay.belgaumkar@intel.com> Cc: Michal Winiarski <michal.winiarski@intel.com> Cc: Zou Nanhai <nanhai.zou@intel.com> Cc: Kristian Høgsberg <hoegsberg@gmail.com> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> Reviewed-by: Michel Thierry <michel.thierry@intel.com> Acked-by: PDT Signed-off-by: Thomas Daniel <thomas.daniel@intel.com> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/1449575707-20933-1-git-send-email-thomas.daniel@intel.com
2015-12-08 11:55:07 +00:00
#define I915_PARAM_HAS_EXEC_SOFTPIN 37
#define I915_PARAM_HAS_POOLED_EU 38
#define I915_PARAM_MIN_EU_IN_POOL 39
#define I915_PARAM_MMAP_GTT_VERSION 40
/*
* Query whether DRM_I915_GEM_EXECBUFFER2 supports user defined execution
* priorities and the driver will attempt to execute batches in priority order.
* The param returns a capability bitmask, nonzero implies that the scheduler
* is enabled, with different features present according to the mask.
drm/i915/scheduler: Support user-defined priorities Use a priority stored in the context as the initial value when submitting a request. This allows us to change the default priority on a per-context basis, allowing different contexts to be favoured with GPU time at the expense of lower importance work. The user can adjust the context's priority via I915_CONTEXT_PARAM_PRIORITY, with more positive values being higher priority (they will be serviced earlier, after their dependencies have been resolved). Any prerequisite work for an execbuf will have its priority raised to match the new request as required. Normal users can specify any value in the range of -1023 to 0 [default], i.e. they can reduce the priority of their workloads (and temporarily boost it back to normal if so desired). Privileged users can specify any value in the range of -1023 to 1023, [default is 0], i.e. they can raise their priority above all overs and so potentially starve the system. Note that the existing schedulers are not fair, nor load balancing, the execution is strictly by priority on a first-come, first-served basis, and the driver may choose to boost some requests above the range available to users. This priority was originally based around nice(2), but evolved to allow clients to adjust their priority within a small range, and allow for a privileged high priority range. For example, this can be used to implement EGL_IMG_context_priority https://www.khronos.org/registry/egl/extensions/IMG/EGL_IMG_context_priority.txt EGL_CONTEXT_PRIORITY_LEVEL_IMG determines the priority level of the context to be created. This attribute is a hint, as an implementation may not support multiple contexts at some priority levels and system policy may limit access to high priority contexts to appropriate system privilege level. The default value for EGL_CONTEXT_PRIORITY_LEVEL_IMG is EGL_CONTEXT_PRIORITY_MEDIUM_IMG." so we can map PRIORITY_HIGH -> 1023 [privileged, will failback to 0] PRIORITY_MED -> 0 [default] PRIORITY_LOW -> -1023 They also map onto the priorities used by VkQueue (and a VkQueue is essentially a timeline, our i915_gem_context under full-ppgtt). v2: s/CAP_SYS_ADMIN/CAP_SYS_NICE/ v3: Report min/max user priorities as defines in the uapi, and rebase internal priorities on the exposed values. Testcase: igt/gem_exec_schedule Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20171003203453.15692-9-chris@chris-wilson.co.uk
2017-10-03 20:34:53 +00:00
*
* The initial priority for each batch is supplied by the context and is
* controlled via I915_CONTEXT_PARAM_PRIORITY.
*/
#define I915_PARAM_HAS_SCHEDULER 41
#define I915_SCHEDULER_CAP_ENABLED (1ul << 0)
#define I915_SCHEDULER_CAP_PRIORITY (1ul << 1)
#define I915_SCHEDULER_CAP_PREEMPTION (1ul << 2)
drm/i915: Use HW semaphores for inter-engine synchronisation on gen8+ Having introduced per-context seqno, we now have a means to identity progress across the system without feel of rollback as befell the global_seqno. That is we can program a MI_SEMAPHORE_WAIT operation in advance of submission safe in the knowledge that our target seqno and address is stable. However, since we are telling the GPU to busy-spin on the target address until it matches the signaling seqno, we only want to do so when we are sure that busy-spin will be completed quickly. To achieve this we only submit the request to HW once the signaler is itself executing (modulo preemption causing us to wait longer), and we only do so for default and above priority requests (so that idle priority tasks never themselves hog the GPU waiting for others). As might be reasonably expected, HW semaphores excel in inter-engine synchronisation microbenchmarks (where the 3x reduced latency / increased throughput more than offset the power cost of spinning on a second ring) and have significant improvement (can be up to ~10%, most see no change) for single clients that utilize multiple engines (typically media players and transcoders), without regressing multiple clients that can saturate the system or changing the power envelope dramatically. v3: Drop the older NEQ branch, now we pin the signaler's HWSP anyway. v4: Tell the world and include it as part of scheduler caps. Testcase: igt/gem_exec_whisper Testcase: igt/benchmarks/gem_wsim Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20190301170901.8340-3-chris@chris-wilson.co.uk
2019-03-01 17:09:00 +00:00
#define I915_SCHEDULER_CAP_SEMAPHORES (1ul << 3)
#define I915_SCHEDULER_CAP_ENGINE_BUSY_STATS (1ul << 4)
/*
* Indicates the 2k user priority levels are statically mapped into 3 buckets as
* follows:
*
* -1k to -1 Low priority
* 0 Normal priority
* 1 to 1k Highest priority
*/
#define I915_SCHEDULER_CAP_STATIC_PRIORITY_MAP (1ul << 5)
/*
* Query the status of HuC load.
*
* The query can fail in the following scenarios with the listed error codes:
* -ENODEV if HuC is not present on this platform,
* -EOPNOTSUPP if HuC firmware usage is disabled,
* -ENOPKG if HuC firmware fetch failed,
* -ENOEXEC if HuC firmware is invalid or mismatched,
* -ENOMEM if i915 failed to prepare the FW objects for transfer to the uC,
* -EIO if the FW transfer or the FW authentication failed.
*
* If the IOCTL is successful, the returned parameter will be set to one of the
* following values:
* * 0 if HuC firmware load is not complete,
* * 1 if HuC firmware is loaded and fully authenticated,
* * 2 if HuC firmware is loaded and authenticated for clear media only
*/
#define I915_PARAM_HUC_STATUS 42
drm/i915: Enable userspace to opt-out of implicit fencing Userspace is faced with a dilemma. The kernel requires implicit fencing to manage resource usage (we always must wait for the GPU to finish before releasing its PTE) and for third parties. However, userspace may wish to avoid this serialisation if it is either using explicit fencing between parties and wants more fine-grained access to buffers (e.g. it may partition the buffer between uses and track fences on ranges rather than the implicit fences tracking the whole object). It follows that userspace needs a mechanism to avoid the kernel's serialisation on its implicit fences before execbuf execution. The next question is whether this is an object, execbuf or context flag. Hybrid users (such as using explicit EGL_ANDROID_native_sync fencing on shared winsys buffers, but implicit fencing on internal surfaces) require a per-object level flag. Given that this flag need to be only set once for the lifetime of the object, this reduces the convenience of having an execbuf or context level flag (and avoids having multiple pieces of uABI controlling the same feature). Incorrect use of this flag will result in rendering corruption and GPU hangs - but will not result in use-after-free or similar resource tracking issues. Serious caveat: write ordering is not strictly correct after setting this flag on a render target on multiple engines. This affects all subsequent GEM operations (execbuf, set-domain, pread) and shared dma-buf operations. A fix is possible - but costly (both in terms of further ABI changes and runtime overhead). Testcase: igt/gem_exec_async Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Acked-by: Chad Versace <chadversary@chromium.org> Link: http://patchwork.freedesktop.org/patch/msgid/20170127094008.27489-1-chris@chris-wilson.co.uk
2017-01-27 09:40:07 +00:00
/* Query whether DRM_I915_GEM_EXECBUFFER2 supports the ability to opt-out of
* synchronisation with implicit fencing on individual objects.
* See EXEC_OBJECT_ASYNC.
*/
#define I915_PARAM_HAS_EXEC_ASYNC 43
/* Query whether DRM_I915_GEM_EXECBUFFER2 supports explicit fence support -
* both being able to pass in a sync_file fd to wait upon before executing,
* and being able to return a new sync_file fd that is signaled when the
* current request is complete. See I915_EXEC_FENCE_IN and I915_EXEC_FENCE_OUT.
*/
#define I915_PARAM_HAS_EXEC_FENCE 44
/* Query whether DRM_I915_GEM_EXECBUFFER2 supports the ability to capture
* user-specified buffers for post-mortem debugging of GPU hangs. See
* EXEC_OBJECT_CAPTURE.
*/
#define I915_PARAM_HAS_EXEC_CAPTURE 45
#define I915_PARAM_SLICE_MASK 46
/* Assuming it's uniform for each slice, this queries the mask of subslices
* per-slice for this system.
*/
#define I915_PARAM_SUBSLICE_MASK 47
/*
* Query whether DRM_I915_GEM_EXECBUFFER2 supports supplying the batch buffer
* as the first execobject as opposed to the last. See I915_EXEC_BATCH_FIRST.
*/
#define I915_PARAM_HAS_EXEC_BATCH_FIRST 48
/* Query whether DRM_I915_GEM_EXECBUFFER2 supports supplying an array of
* drm_i915_gem_exec_fence structures. See I915_EXEC_FENCE_ARRAY.
*/
#define I915_PARAM_HAS_EXEC_FENCE_ARRAY 49
/*
* Query whether every context (both per-file default and user created) is
* isolated (insofar as HW supports). If this parameter is not true, then
* freshly created contexts may inherit values from an existing context,
* rather than default HW values. If true, it also ensures (insofar as HW
* supports) that all state set by this context will not leak to any other
* context.
*
* As not every engine across every gen support contexts, the returned
* value reports the support of context isolation for individual engines by
* returning a bitmask of each engine class set to true if that class supports
* isolation.
*/
#define I915_PARAM_HAS_CONTEXT_ISOLATION 50
/* Frequency of the command streamer timestamps given by the *_TIMESTAMP
* registers. This used to be fixed per platform but from CNL onwards, this
* might vary depending on the parts.
*/
#define I915_PARAM_CS_TIMESTAMP_FREQUENCY 51
/*
* Once upon a time we supposed that writes through the GGTT would be
* immediately in physical memory (once flushed out of the CPU path). However,
* on a few different processors and chipsets, this is not necessarily the case
* as the writes appear to be buffered internally. Thus a read of the backing
* storage (physical memory) via a different path (with different physical tags
* to the indirect write via the GGTT) will see stale values from before
* the GGTT write. Inside the kernel, we can for the most part keep track of
* the different read/write domains in use (e.g. set-domain), but the assumption
* of coherency is baked into the ABI, hence reporting its true state in this
* parameter.
*
* Reports true when writes via mmap_gtt are immediately visible following an
* lfence to flush the WCB.
*
* Reports false when writes via mmap_gtt are indeterminately delayed in an in
* internal buffer and are _not_ immediately visible to third parties accessing
* directly via mmap_cpu/mmap_wc. Use of mmap_gtt as part of an IPC
* communications channel when reporting false is strongly disadvised.
*/
#define I915_PARAM_MMAP_GTT_COHERENT 52
2019-05-21 21:11:34 +00:00
/*
* Query whether DRM_I915_GEM_EXECBUFFER2 supports coordination of parallel
* execution through use of explicit fence support.
* See I915_EXEC_FENCE_OUT and I915_EXEC_FENCE_SUBMIT.
*/
#define I915_PARAM_HAS_EXEC_SUBMIT_FENCE 53
/*
* Revision of the i915-perf uAPI. The value returned helps determine what
* i915-perf features are available. See drm_i915_perf_property_id.
*/
#define I915_PARAM_PERF_REVISION 54
/* Query whether DRM_I915_GEM_EXECBUFFER2 supports supplying an array of
* timeline syncobj through drm_i915_gem_execbuffer_ext_timeline_fences. See
* I915_EXEC_USE_EXTENSIONS.
*/
#define I915_PARAM_HAS_EXEC_TIMELINE_FENCES 55
drm/i915/userptr: Probe existence of backing struct pages upon creation Jason Ekstrand requested a more efficient method than userptr+set-domain to determine if the userptr object was backed by a complete set of pages upon creation. To be more efficient than simply populating the userptr using get_user_pages() (as done by the call to set-domain or execbuf), we can walk the tree of vm_area_struct and check for gaps or vma not backed by struct page (VM_PFNMAP). The question is how to handle VM_MIXEDMAP which may be either struct page or pfn backed... With discrete we are going to drop support for set_domain(), so offering a way to probe the pages, without having to resort to dummy batches has been requested. v2: - add new query param for the PROBE flag, so userspace can easily check if the kernel supports it(Jason). - use mmap_read_{lock, unlock}. - add some kernel-doc. v3: - In the docs also mention that PROBE doesn't guarantee that the pages will remain valid by the time they are actually used(Tvrtko). - Add a small comment for the hole finding logic(Jason). - Move the param next to all the other params which just return true. Testcase: igt/gem_userptr_blits/probe Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Matthew Auld <matthew.auld@intel.com> Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com> Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> Cc: Jordan Justen <jordan.l.justen@intel.com> Cc: Kenneth Graunke <kenneth@whitecape.org> Cc: Jason Ekstrand <jason@jlekstrand.net> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Cc: Ramalingam C <ramalingam.c@intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Acked-by: Kenneth Graunke <kenneth@whitecape.org> Reviewed-by: Jason Ekstrand <jason@jlekstrand.net> Acked-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20210723113405.427004-1-matthew.auld@intel.com
2021-07-23 11:34:05 +00:00
/* Query if the kernel supports the I915_USERPTR_PROBE flag. */
#define I915_PARAM_HAS_USERPTR_PROBE 56
/*
* Frequency of the timestamps in OA reports. This used to be the same as the CS
* timestamp frequency, but differs on some platforms.
*/
#define I915_PARAM_OA_TIMESTAMP_FREQUENCY 57
/*
* Query the status of PXP support in i915.
*
* The query can fail in the following scenarios with the listed error codes:
* -ENODEV = PXP support is not available on the GPU device or in the
* kernel due to missing component drivers or kernel configs.
*
* If the IOCTL is successful, the returned parameter will be set to one of
* the following values:
* 1 = PXP feature is supported and is ready for use.
* 2 = PXP feature is supported but should be ready soon (pending
* initialization of non-i915 system dependencies).
*
* NOTE: When param is supported (positive return values), user space should
* still refer to the GEM PXP context-creation UAPI header specs to be
* aware of possible failure due to system state machine at the time.
*/
#define I915_PARAM_PXP_STATUS 58
drm/i915/guc: Use context hints for GT frequency Allow user to provide a low latency context hint. When set, KMD sends a hint to GuC which results in special handling for this context. SLPC will ramp the GT frequency aggressively every time it switches to this context. The down freq threshold will also be lower so GuC will ramp down the GT freq for this context more slowly. We also disable waitboost for this context as that will interfere with the strategy. We need to enable the use of SLPC Compute strategy during init, but it will apply only to contexts that set this bit during context creation. Userland can check whether this feature is supported using a new param- I915_PARAM_HAS_CONTEXT_FREQ_HINT. This flag is true for all guc submission enabled platforms as they use SLPC for frequency management. The Mesa usage model for this flag is here - https://gitlab.freedesktop.org/sushmave/mesa/-/commits/compute_hint v2: Rename flags as per review suggestions (Rodrigo, Tvrtko). Also, use flag bits in intel_context as it allows finer control for toggling per engine if needed (Tvrtko). v3: Minor review comments (Tvrtko) v4: Update comment (Sushma) Cc: Rodrigo Vivi <rodrigo.vivi@intel.com> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> Cc: Sushma Venkatesh Reddy <sushma.venkatesh.reddy@intel.com> Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com> Acked-by: Ivan Briano <ivan.briano@intel.com> Signed-off-by: Vinay Belgaumkar <vinay.belgaumkar@intel.com> Signed-off-by: John Harrison <John.C.Harrison@Intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20240306012759.204938-1-vinay.belgaumkar@intel.com
2024-03-06 01:27:59 +00:00
/*
* Query if kernel allows marking a context to send a Freq hint to SLPC. This
* will enable use of the strategies allowed by the SLPC algorithm.
*/
#define I915_PARAM_HAS_CONTEXT_FREQ_HINT 59
/* Must be kept compact -- no holes and well documented */
/**
* struct drm_i915_getparam - Driver parameter query structure.
*/
struct drm_i915_getparam {
/** @param: Driver parameter to query. */
__s32 param;
/**
* @value: Address of memory where queried value should be put.
*
* WARNING: Using pointers instead of fixed-size u64 means we need to write
* compat32 code. Don't repeat this mistake.
*/
int __user *value;
};
/**
* typedef drm_i915_getparam_t - Driver parameter query structure.
* See struct drm_i915_getparam.
*/
typedef struct drm_i915_getparam drm_i915_getparam_t;
/* Ioctl to set kernel params:
*/
#define I915_SETPARAM_USE_MI_BATCHBUFFER_START 1
#define I915_SETPARAM_TEX_LRU_LOG_GRANULARITY 2
#define I915_SETPARAM_ALLOW_BATCHBUFFER 3
#define I915_SETPARAM_NUM_USED_FENCES 4
/* Must be kept compact -- no holes */
typedef struct drm_i915_setparam {
int param;
int value;
} drm_i915_setparam_t;
/* A memory manager for regions of shared memory:
*/
#define I915_MEM_REGION_AGP 1
typedef struct drm_i915_mem_alloc {
int region;
int alignment;
int size;
int __user *region_offset; /* offset from start of fb or agp */
} drm_i915_mem_alloc_t;
typedef struct drm_i915_mem_free {
int region;
int region_offset;
} drm_i915_mem_free_t;
typedef struct drm_i915_mem_init_heap {
int region;
int size;
int start;
} drm_i915_mem_init_heap_t;
/* Allow memory manager to be torn down and re-initialized (eg on
* rotate):
*/
typedef struct drm_i915_mem_destroy_heap {
int region;
} drm_i915_mem_destroy_heap_t;
/* Allow X server to configure which pipes to monitor for vblank signals
*/
#define DRM_I915_VBLANK_PIPE_A 1
#define DRM_I915_VBLANK_PIPE_B 2
typedef struct drm_i915_vblank_pipe {
int pipe;
} drm_i915_vblank_pipe_t;
/* Schedule buffer swap at given vertical blank:
*/
typedef struct drm_i915_vblank_swap {
drm_drawable_t drawable;
enum drm_vblank_seq_type seqtype;
unsigned int sequence;
} drm_i915_vblank_swap_t;
typedef struct drm_i915_hws_addr {
__u64 addr;
} drm_i915_hws_addr_t;
struct drm_i915_gem_init {
/**
* Beginning offset in the GTT to be managed by the DRM memory
* manager.
*/
__u64 gtt_start;
/**
* Ending offset in the GTT to be managed by the DRM memory
* manager.
*/
__u64 gtt_end;
};
struct drm_i915_gem_create {
/**
* Requested size for the object.
*
* The (page-aligned) allocated size for the object will be returned.
*/
__u64 size;
/**
* Returned handle for the object.
*
* Object handles are nonzero.
*/
__u32 handle;
__u32 pad;
};
struct drm_i915_gem_pread {
/** Handle for the object being read. */
__u32 handle;
__u32 pad;
/** Offset into the object to read from */
__u64 offset;
/** Length of data to read */
__u64 size;
/**
* Pointer to write the data into.
*
* This is a fixed-size type for 32/64 compatibility.
*/
__u64 data_ptr;
};
struct drm_i915_gem_pwrite {
/** Handle for the object being written to. */
__u32 handle;
__u32 pad;
/** Offset into the object to write to */
__u64 offset;
/** Length of data to write */
__u64 size;
/**
* Pointer to read the data from.
*
* This is a fixed-size type for 32/64 compatibility.
*/
__u64 data_ptr;
};
struct drm_i915_gem_mmap {
/** Handle for the object being mapped. */
__u32 handle;
__u32 pad;
/** Offset in the object to map. */
__u64 offset;
/**
* Length of data to map.
*
* The value will be page-aligned.
*/
__u64 size;
/**
* Returned pointer the data was mapped at.
*
* This is a fixed-size type for 32/64 compatibility.
*/
__u64 addr_ptr;
drm/i915: Support creation of unbound wc user mappings for objects This patch provides support to create write-combining virtual mappings of GEM object. It intends to provide the same funtionality of 'mmap_gtt' interface without the constraints and contention of a limited aperture space, but requires clients handles the linear to tile conversion on their own. This is for improving the CPU write operation performance, as with such mapping, writes and reads are almost 50% faster than with mmap_gtt. Similar to the GTT mmapping, unlike the regular CPU mmapping, it avoids the cache flush after update from CPU side, when object is passed onto GPU. This type of mapping is specially useful in case of sub-region update, i.e. when only a portion of the object is to be updated. Using a CPU mmap in such cases would normally incur a clflush of the whole object, and using a GTT mmapping would likely require eviction of an active object or fence and thus stall. The write-combining CPU mmap avoids both. To ensure the cache coherency, before using this mapping, the GTT domain has been reused here. This provides the required cache flush if the object is in CPU domain or synchronization against the concurrent rendering. Although the access through an uncached mmap should automatically invalidate the cache lines, this may not be true for non-temporal write instructions and also not all pages of the object may be updated at any given point of time through this mapping. Having a call to get_pages in set_to_gtt_domain function, as added in the earlier patch 'drm/i915: Broaden application of set-domain(GTT)', would guarantee the clflush and so there will be no cachelines holding the data for the object before it is accessed through this map. The drm_i915_gem_mmap structure (for the DRM_I915_GEM_MMAP_IOCTL) has been extended with a new flags field (defaulting to 0 for existent users). In order for userspace to detect the extended ioctl, a new parameter I915_PARAM_MMAP_VERSION has been added for versioning the ioctl interface. v2: Fix error handling, invalid flag detection, renaming (ickle) v3: Rebase to latest drm-intel-nightly codebase The new mmapping is exercised by igt/gem_mmap_wc, igt/gem_concurrent_blit and igt/gem_gtt_speed. Change-Id: Ie883942f9e689525f72fe9a8d3780c3a9faa769a Signed-off-by: Akash Goel <akash.goel@intel.com> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-01-02 10:59:30 +00:00
/**
* Flags for extended behaviour.
*
* Added in version 2.
*/
__u64 flags;
#define I915_MMAP_WC 0x1
};
struct drm_i915_gem_mmap_gtt {
/** Handle for the object being mapped. */
__u32 handle;
__u32 pad;
/**
* Fake offset to use for subsequent mmap call
*
* This is a fixed-size type for 32/64 compatibility.
*/
__u64 offset;
};
/**
* struct drm_i915_gem_mmap_offset - Retrieve an offset so we can mmap this buffer object.
*
* This struct is passed as argument to the `DRM_IOCTL_I915_GEM_MMAP_OFFSET` ioctl,
* and is used to retrieve the fake offset to mmap an object specified by &handle.
*
* The legacy way of using `DRM_IOCTL_I915_GEM_MMAP` is removed on gen12+.
* `DRM_IOCTL_I915_GEM_MMAP_GTT` is an older supported alias to this struct, but will behave
* as setting the &extensions to 0, and &flags to `I915_MMAP_OFFSET_GTT`.
*/
struct drm_i915_gem_mmap_offset {
/** @handle: Handle for the object being mapped. */
__u32 handle;
/** @pad: Must be zero */
__u32 pad;
/**
* @offset: The fake offset to use for subsequent mmap call
*
* This is a fixed-size type for 32/64 compatibility.
*/
__u64 offset;
/**
* @flags: Flags for extended behaviour.
*
* It is mandatory that one of the `MMAP_OFFSET` types
* should be included:
*
* - `I915_MMAP_OFFSET_GTT`: Use mmap with the object bound to GTT. (Write-Combined)
* - `I915_MMAP_OFFSET_WC`: Use Write-Combined caching.
* - `I915_MMAP_OFFSET_WB`: Use Write-Back caching.
* - `I915_MMAP_OFFSET_FIXED`: Use object placement to determine caching.
*
* On devices with local memory `I915_MMAP_OFFSET_FIXED` is the only valid
* type. On devices without local memory, this caching mode is invalid.
*
* As caching mode when specifying `I915_MMAP_OFFSET_FIXED`, WC or WB will
* be used, depending on the object placement on creation. WB will be used
* when the object can only exist in system memory, WC otherwise.
*/
__u64 flags;
#define I915_MMAP_OFFSET_GTT 0
#define I915_MMAP_OFFSET_WC 1
#define I915_MMAP_OFFSET_WB 2
#define I915_MMAP_OFFSET_UC 3
#define I915_MMAP_OFFSET_FIXED 4
/**
* @extensions: Zero-terminated chain of extensions.
*
* No current extensions defined; mbz.
*/
__u64 extensions;
};
/**
* struct drm_i915_gem_set_domain - Adjust the objects write or read domain, in
* preparation for accessing the pages via some CPU domain.
*
* Specifying a new write or read domain will flush the object out of the
* previous domain(if required), before then updating the objects domain
* tracking with the new domain.
*
* Note this might involve waiting for the object first if it is still active on
* the GPU.
*
* Supported values for @read_domains and @write_domain:
*
* - I915_GEM_DOMAIN_WC: Uncached write-combined domain
* - I915_GEM_DOMAIN_CPU: CPU cache domain
* - I915_GEM_DOMAIN_GTT: Mappable aperture domain
*
* All other domains are rejected.
*
* Note that for discrete, starting from DG1, this is no longer supported, and
* is instead rejected. On such platforms the CPU domain is effectively static,
* where we also only support a single &drm_i915_gem_mmap_offset cache mode,
* which can't be set explicitly and instead depends on the object placements,
* as per the below.
*
* Implicit caching rules, starting from DG1:
*
* - If any of the object placements (see &drm_i915_gem_create_ext_memory_regions)
* contain I915_MEMORY_CLASS_DEVICE then the object will be allocated and
* mapped as write-combined only.
*
* - Everything else is always allocated and mapped as write-back, with the
* guarantee that everything is also coherent with the GPU.
*
* Note that this is likely to change in the future again, where we might need
* more flexibility on future devices, so making this all explicit as part of a
* new &drm_i915_gem_create_ext extension is probable.
*/
struct drm_i915_gem_set_domain {
/** @handle: Handle for the object. */
__u32 handle;
/** @read_domains: New read domains. */
__u32 read_domains;
/**
* @write_domain: New write domain.
*
* Note that having something in the write domain implies it's in the
* read domain, and only that read domain.
*/
__u32 write_domain;
};
struct drm_i915_gem_sw_finish {
/** Handle for the object */
__u32 handle;
};
struct drm_i915_gem_relocation_entry {
/**
* Handle of the buffer being pointed to by this relocation entry.
*
* It's appealing to make this be an index into the mm_validate_entry
* list to refer to the buffer, but this allows the driver to create
* a relocation list for state buffers and not re-write it per
* exec using the buffer.
*/
__u32 target_handle;
/**
* Value to be added to the offset of the target buffer to make up
* the relocation entry.
*/
__u32 delta;
/** Offset in the buffer the relocation entry will be written into */
__u64 offset;
/**
* Offset value of the target buffer that the relocation entry was last
* written as.
*
* If the buffer has the same offset as last time, we can skip syncing
* and writing the relocation. This value is written back out by
* the execbuffer ioctl when the relocation is written.
*/
__u64 presumed_offset;
/**
* Target memory domains read by this operation.
*/
__u32 read_domains;
/**
* Target memory domains written by this operation.
*
* Note that only one domain may be written by the whole
* execbuffer operation, so that where there are conflicts,
* the application will get -EINVAL back.
*/
__u32 write_domain;
};
/** @{
* Intel memory domains
*
* Most of these just align with the various caches in
* the system and are used to flush and invalidate as
* objects end up cached in different domains.
*/
/** CPU cache */
#define I915_GEM_DOMAIN_CPU 0x00000001
/** Render cache, used by 2D and 3D drawing */
#define I915_GEM_DOMAIN_RENDER 0x00000002
/** Sampler cache, used by texture engine */
#define I915_GEM_DOMAIN_SAMPLER 0x00000004
/** Command queue, used to load batch buffers */
#define I915_GEM_DOMAIN_COMMAND 0x00000008
/** Instruction cache, used by shader programs */
#define I915_GEM_DOMAIN_INSTRUCTION 0x00000010
/** Vertex address cache */
#define I915_GEM_DOMAIN_VERTEX 0x00000020
/** GTT domain - aperture and scanout */
#define I915_GEM_DOMAIN_GTT 0x00000040
/** WC domain - uncached access */
#define I915_GEM_DOMAIN_WC 0x00000080
/** @} */
struct drm_i915_gem_exec_object {
/**
* User's handle for a buffer to be bound into the GTT for this
* operation.
*/
__u32 handle;
/** Number of relocations to be performed on this buffer */
__u32 relocation_count;
/**
* Pointer to array of struct drm_i915_gem_relocation_entry containing
* the relocations to be performed in this buffer.
*/
__u64 relocs_ptr;
/** Required alignment in graphics aperture */
__u64 alignment;
/**
* Returned value of the updated offset of the object, for future
* presumed_offset writes.
*/
__u64 offset;
};
/* DRM_IOCTL_I915_GEM_EXECBUFFER was removed in Linux 5.13 */
struct drm_i915_gem_execbuffer {
/**
* List of buffers to be validated with their relocations to be
* performend on them.
*
* This is a pointer to an array of struct drm_i915_gem_validate_entry.
*
* These buffers must be listed in an order such that all relocations
* a buffer is performing refer to buffers that have already appeared
* in the validate list.
*/
__u64 buffers_ptr;
__u32 buffer_count;
/** Offset in the batchbuffer to start execution from. */
__u32 batch_start_offset;
/** Bytes used in batchbuffer from batch_start_offset */
__u32 batch_len;
__u32 DR1;
__u32 DR4;
__u32 num_cliprects;
/** This is a struct drm_clip_rect *cliprects */
__u64 cliprects_ptr;
};
struct drm_i915_gem_exec_object2 {
/**
* User's handle for a buffer to be bound into the GTT for this
* operation.
*/
__u32 handle;
/** Number of relocations to be performed on this buffer */
__u32 relocation_count;
/**
* Pointer to array of struct drm_i915_gem_relocation_entry containing
* the relocations to be performed in this buffer.
*/
__u64 relocs_ptr;
/** Required alignment in graphics aperture */
__u64 alignment;
/**
drm/i915: Add soft-pinning API for execbuffer Userspace can pass in an offset that it presumes the object is located at. The kernel will then do its utmost to fit the object into that location. The assumption is that userspace is handling its own object locations (for example along with full-ppgtt) and that the kernel will rarely have to make space for the user's requests. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> v2: Fixed incorrect eviction found by Michal Winiarski - fix suggested by Chris Wilson. Fixed incorrect error paths causing crash found by Michal Winiarski. (Not published externally) v3: Rebased because of trivial conflict in object_bind_to_vm. Fixed eviction to allow eviction of soft-pinned objects when another soft-pinned object used by a subsequent execbuffer overlaps reported by Michal Winiarski. (Not published externally) v4: Moved soft-pinned objects to the front of ordered_vmas so that they are pinned first after an address conflict happens to avoid repeated conflicts in rare cases (Suggested by Chris Wilson). Expanded comment on drm_i915_gem_exec_object2.offset to cover this new API. v5: Added I915_PARAM_HAS_EXEC_SOFTPIN parameter for detecting this capability (Kristian). Added check for multiple pinnings on eviction (Akash). Made sure buffers are not considered misplaced without the user specifying EXEC_OBJECT_SUPPORTS_48B_ADDRESS. User must assume responsibility for any addressing workarounds. Updated object2.offset field comment again to clarify NO_RELOC case (Chris). checkpatch cleanup. v6: Trivial rebase on latest drm-intel-nightly v7: Catch attempts to pin above the max virtual address size and return EINVAL (Tvrtko). Decouple EXEC_OBJECT_SUPPORTS_48B_ADDRESS and EXEC_OBJECT_PINNED flags, user must pass both flags in any attempt to pin something at an offset above 4GB (Chris, Daniel Vetter). Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Akash Goel <akash.goel@intel.com> Cc: Vinay Belgaumkar <vinay.belgaumkar@intel.com> Cc: Michal Winiarski <michal.winiarski@intel.com> Cc: Zou Nanhai <nanhai.zou@intel.com> Cc: Kristian Høgsberg <hoegsberg@gmail.com> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> Reviewed-by: Michel Thierry <michel.thierry@intel.com> Acked-by: PDT Signed-off-by: Thomas Daniel <thomas.daniel@intel.com> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/1449575707-20933-1-git-send-email-thomas.daniel@intel.com
2015-12-08 11:55:07 +00:00
* When the EXEC_OBJECT_PINNED flag is specified this is populated by
* the user with the GTT offset at which this object will be pinned.
*
drm/i915: Add soft-pinning API for execbuffer Userspace can pass in an offset that it presumes the object is located at. The kernel will then do its utmost to fit the object into that location. The assumption is that userspace is handling its own object locations (for example along with full-ppgtt) and that the kernel will rarely have to make space for the user's requests. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> v2: Fixed incorrect eviction found by Michal Winiarski - fix suggested by Chris Wilson. Fixed incorrect error paths causing crash found by Michal Winiarski. (Not published externally) v3: Rebased because of trivial conflict in object_bind_to_vm. Fixed eviction to allow eviction of soft-pinned objects when another soft-pinned object used by a subsequent execbuffer overlaps reported by Michal Winiarski. (Not published externally) v4: Moved soft-pinned objects to the front of ordered_vmas so that they are pinned first after an address conflict happens to avoid repeated conflicts in rare cases (Suggested by Chris Wilson). Expanded comment on drm_i915_gem_exec_object2.offset to cover this new API. v5: Added I915_PARAM_HAS_EXEC_SOFTPIN parameter for detecting this capability (Kristian). Added check for multiple pinnings on eviction (Akash). Made sure buffers are not considered misplaced without the user specifying EXEC_OBJECT_SUPPORTS_48B_ADDRESS. User must assume responsibility for any addressing workarounds. Updated object2.offset field comment again to clarify NO_RELOC case (Chris). checkpatch cleanup. v6: Trivial rebase on latest drm-intel-nightly v7: Catch attempts to pin above the max virtual address size and return EINVAL (Tvrtko). Decouple EXEC_OBJECT_SUPPORTS_48B_ADDRESS and EXEC_OBJECT_PINNED flags, user must pass both flags in any attempt to pin something at an offset above 4GB (Chris, Daniel Vetter). Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Akash Goel <akash.goel@intel.com> Cc: Vinay Belgaumkar <vinay.belgaumkar@intel.com> Cc: Michal Winiarski <michal.winiarski@intel.com> Cc: Zou Nanhai <nanhai.zou@intel.com> Cc: Kristian Høgsberg <hoegsberg@gmail.com> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> Reviewed-by: Michel Thierry <michel.thierry@intel.com> Acked-by: PDT Signed-off-by: Thomas Daniel <thomas.daniel@intel.com> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/1449575707-20933-1-git-send-email-thomas.daniel@intel.com
2015-12-08 11:55:07 +00:00
* When the I915_EXEC_NO_RELOC flag is specified this must contain the
* presumed_offset of the object.
*
drm/i915: Add soft-pinning API for execbuffer Userspace can pass in an offset that it presumes the object is located at. The kernel will then do its utmost to fit the object into that location. The assumption is that userspace is handling its own object locations (for example along with full-ppgtt) and that the kernel will rarely have to make space for the user's requests. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> v2: Fixed incorrect eviction found by Michal Winiarski - fix suggested by Chris Wilson. Fixed incorrect error paths causing crash found by Michal Winiarski. (Not published externally) v3: Rebased because of trivial conflict in object_bind_to_vm. Fixed eviction to allow eviction of soft-pinned objects when another soft-pinned object used by a subsequent execbuffer overlaps reported by Michal Winiarski. (Not published externally) v4: Moved soft-pinned objects to the front of ordered_vmas so that they are pinned first after an address conflict happens to avoid repeated conflicts in rare cases (Suggested by Chris Wilson). Expanded comment on drm_i915_gem_exec_object2.offset to cover this new API. v5: Added I915_PARAM_HAS_EXEC_SOFTPIN parameter for detecting this capability (Kristian). Added check for multiple pinnings on eviction (Akash). Made sure buffers are not considered misplaced without the user specifying EXEC_OBJECT_SUPPORTS_48B_ADDRESS. User must assume responsibility for any addressing workarounds. Updated object2.offset field comment again to clarify NO_RELOC case (Chris). checkpatch cleanup. v6: Trivial rebase on latest drm-intel-nightly v7: Catch attempts to pin above the max virtual address size and return EINVAL (Tvrtko). Decouple EXEC_OBJECT_SUPPORTS_48B_ADDRESS and EXEC_OBJECT_PINNED flags, user must pass both flags in any attempt to pin something at an offset above 4GB (Chris, Daniel Vetter). Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Akash Goel <akash.goel@intel.com> Cc: Vinay Belgaumkar <vinay.belgaumkar@intel.com> Cc: Michal Winiarski <michal.winiarski@intel.com> Cc: Zou Nanhai <nanhai.zou@intel.com> Cc: Kristian Høgsberg <hoegsberg@gmail.com> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> Reviewed-by: Michel Thierry <michel.thierry@intel.com> Acked-by: PDT Signed-off-by: Thomas Daniel <thomas.daniel@intel.com> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/1449575707-20933-1-git-send-email-thomas.daniel@intel.com
2015-12-08 11:55:07 +00:00
* During execbuffer2 the kernel populates it with the value of the
* current GTT offset of the object, for future presumed_offset writes.
*
* See struct drm_i915_gem_create_ext for the rules when dealing with
* alignment restrictions with I915_MEMORY_CLASS_DEVICE, on devices with
* minimum page sizes, like DG2.
*/
__u64 offset;
#define EXEC_OBJECT_NEEDS_FENCE (1<<0)
#define EXEC_OBJECT_NEEDS_GTT (1<<1)
#define EXEC_OBJECT_WRITE (1<<2)
drm/i915: Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset There are some allocations that must be only referenced by 32-bit offsets. To limit the chances of having the first 4GB already full, objects not requiring this workaround use DRM_MM_SEARCH_BELOW/ DRM_MM_CREATE_TOP flags In specific, any resource used with flat/heapless (0x00000000-0xfffff000) General State Heap (GSH) or Instruction State Heap (ISH) must be in a 32-bit range, because the General State Offset and Instruction State Offset are limited to 32-bits. Objects must have EXEC_OBJECT_SUPPORTS_48B_ADDRESS flag to indicate if they can be allocated above the 32-bit address range. To limit the chances of having the first 4GB already full, objects will use DRM_MM_SEARCH_BELOW + DRM_MM_CREATE_TOP flags when possible. The libdrm user of the EXEC_OBJECT_SUPPORTS_48B_ADDRESS flag is here: http://lists.freedesktop.org/archives/intel-gfx/2015-September/075836.html v2: Changed flag logic from neeeds_32b, to supports_48b. v3: Moved 48-bit support flag back to exec_object. (Chris, Daniel) v4: Split pin flags into PIN_ZONE_4G and PIN_HIGH; update PIN_OFFSET_MASK to use last PIN_ defined instead of hard-coded value; use correct limit check in eb_vma_misplaced. (Chris) v5: Don't touch PIN_OFFSET_MASK and update workaround comment (Chris) v6: Apply pin-high for ggtt too (Chris) v7: Handle simultaneous pin-high and pin-mappable end correctly (Akash) Fix check for entries currently using +4GB addresses, use min_t and other polish in object_bind_to_vm (Chris) v8: Commit message updated to point to libdrm patch. v9: vmas are allocated in the correct ozone, so only check flag when the vma has not been allocated. (Chris) Cc: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> (v4) Signed-off-by: Michel Thierry <michel.thierry@intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-10-01 12:33:57 +00:00
#define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3)
#define EXEC_OBJECT_PINNED (1<<4)
#define EXEC_OBJECT_PAD_TO_SIZE (1<<5)
drm/i915: Enable userspace to opt-out of implicit fencing Userspace is faced with a dilemma. The kernel requires implicit fencing to manage resource usage (we always must wait for the GPU to finish before releasing its PTE) and for third parties. However, userspace may wish to avoid this serialisation if it is either using explicit fencing between parties and wants more fine-grained access to buffers (e.g. it may partition the buffer between uses and track fences on ranges rather than the implicit fences tracking the whole object). It follows that userspace needs a mechanism to avoid the kernel's serialisation on its implicit fences before execbuf execution. The next question is whether this is an object, execbuf or context flag. Hybrid users (such as using explicit EGL_ANDROID_native_sync fencing on shared winsys buffers, but implicit fencing on internal surfaces) require a per-object level flag. Given that this flag need to be only set once for the lifetime of the object, this reduces the convenience of having an execbuf or context level flag (and avoids having multiple pieces of uABI controlling the same feature). Incorrect use of this flag will result in rendering corruption and GPU hangs - but will not result in use-after-free or similar resource tracking issues. Serious caveat: write ordering is not strictly correct after setting this flag on a render target on multiple engines. This affects all subsequent GEM operations (execbuf, set-domain, pread) and shared dma-buf operations. A fix is possible - but costly (both in terms of further ABI changes and runtime overhead). Testcase: igt/gem_exec_async Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Acked-by: Chad Versace <chadversary@chromium.org> Link: http://patchwork.freedesktop.org/patch/msgid/20170127094008.27489-1-chris@chris-wilson.co.uk
2017-01-27 09:40:07 +00:00
/* The kernel implicitly tracks GPU activity on all GEM objects, and
* synchronises operations with outstanding rendering. This includes
* rendering on other devices if exported via dma-buf. However, sometimes
* this tracking is too coarse and the user knows better. For example,
* if the object is split into non-overlapping ranges shared between different
* clients or engines (i.e. suballocating objects), the implicit tracking
* by kernel assumes that each operation affects the whole object rather
* than an individual range, causing needless synchronisation between clients.
* The kernel will also forgo any CPU cache flushes prior to rendering from
* the object as the client is expected to be also handling such domain
* tracking.
*
* The kernel maintains the implicit tracking in order to manage resources
* used by the GPU - this flag only disables the synchronisation prior to
* rendering with this object in this execbuf.
*
* Opting out of implicit synhronisation requires the user to do its own
* explicit tracking to avoid rendering corruption. See, for example,
* I915_PARAM_HAS_EXEC_FENCE to order execbufs and execute them asynchronously.
*/
#define EXEC_OBJECT_ASYNC (1<<6)
/* Request that the contents of this execobject be copied into the error
* state upon a GPU hang involving this batch for post-mortem debugging.
* These buffers are recorded in no particular order as "user" in
* /sys/class/drm/cardN/error. Query I915_PARAM_HAS_EXEC_CAPTURE to see
* if the kernel supports this flag.
*/
#define EXEC_OBJECT_CAPTURE (1<<7)
/* All remaining bits are MBZ and RESERVED FOR FUTURE USE */
#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_CAPTURE<<1)
__u64 flags;
union {
__u64 rsvd1;
__u64 pad_to_size;
};
__u64 rsvd2;
};
/**
* struct drm_i915_gem_exec_fence - An input or output fence for the execbuf
* ioctl.
*
* The request will wait for input fence to signal before submission.
*
* The returned output fence will be signaled after the completion of the
* request.
*/
struct drm_i915_gem_exec_fence {
/** @handle: User's handle for a drm_syncobj to wait on or signal. */
__u32 handle;
/**
* @flags: Supported flags are:
*
* I915_EXEC_FENCE_WAIT:
* Wait for the input fence before request submission.
*
* I915_EXEC_FENCE_SIGNAL:
* Return request completion fence as output
*/
__u32 flags;
#define I915_EXEC_FENCE_WAIT (1<<0)
#define I915_EXEC_FENCE_SIGNAL (1<<1)
#define __I915_EXEC_FENCE_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_SIGNAL << 1))
};
/**
* struct drm_i915_gem_execbuffer_ext_timeline_fences - Timeline fences
* for execbuf ioctl.
*
* This structure describes an array of drm_syncobj and associated points for
* timeline variants of drm_syncobj. It is invalid to append this structure to
* the execbuf if I915_EXEC_FENCE_ARRAY is set.
*/
struct drm_i915_gem_execbuffer_ext_timeline_fences {
#define DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES 0
/** @base: Extension link. See struct i915_user_extension. */
struct i915_user_extension base;
/**
* @fence_count: Number of elements in the @handles_ptr & @value_ptr
* arrays.
*/
__u64 fence_count;
/**
* @handles_ptr: Pointer to an array of struct drm_i915_gem_exec_fence
* of length @fence_count.
*/
__u64 handles_ptr;
/**
* @values_ptr: Pointer to an array of u64 values of length
* @fence_count.
* Values must be 0 for a binary drm_syncobj. A Value of 0 for a
* timeline drm_syncobj is invalid as it turns a drm_syncobj into a
* binary one.
*/
__u64 values_ptr;
};
/**
* struct drm_i915_gem_execbuffer2 - Structure for DRM_I915_GEM_EXECBUFFER2
* ioctl.
*/
struct drm_i915_gem_execbuffer2 {
/** @buffers_ptr: Pointer to a list of gem_exec_object2 structs */
__u64 buffers_ptr;
/** @buffer_count: Number of elements in @buffers_ptr array */
__u32 buffer_count;
/**
* @batch_start_offset: Offset in the batchbuffer to start execution
* from.
*/
__u32 batch_start_offset;
/**
* @batch_len: Length in bytes of the batch buffer, starting from the
* @batch_start_offset. If 0, length is assumed to be the batch buffer
* object size.
*/
__u32 batch_len;
/** @DR1: deprecated */
__u32 DR1;
/** @DR4: deprecated */
__u32 DR4;
/** @num_cliprects: See @cliprects_ptr */
__u32 num_cliprects;
/**
* @cliprects_ptr: Kernel clipping was a DRI1 misfeature.
*
* It is invalid to use this field if I915_EXEC_FENCE_ARRAY or
* I915_EXEC_USE_EXTENSIONS flags are not set.
*
* If I915_EXEC_FENCE_ARRAY is set, then this is a pointer to an array
* of &drm_i915_gem_exec_fence and @num_cliprects is the length of the
* array.
*
* If I915_EXEC_USE_EXTENSIONS is set, then this is a pointer to a
* single &i915_user_extension and num_cliprects is 0.
*/
__u64 cliprects_ptr;
/** @flags: Execbuf flags */
__u64 flags;
#define I915_EXEC_RING_MASK (0x3f)
#define I915_EXEC_DEFAULT (0<<0)
#define I915_EXEC_RENDER (1<<0)
#define I915_EXEC_BSD (2<<0)
#define I915_EXEC_BLT (3<<0)
#define I915_EXEC_VEBOX (4<<0)
/* Used for switching the constants addressing mode on gen4+ RENDER ring.
* Gen6+ only supports relative addressing to dynamic state (default) and
* absolute addressing.
*
* These flags are ignored for the BSD and BLT rings.
*/
#define I915_EXEC_CONSTANTS_MASK (3<<6)
#define I915_EXEC_CONSTANTS_REL_GENERAL (0<<6) /* default */
#define I915_EXEC_CONSTANTS_ABSOLUTE (1<<6)
#define I915_EXEC_CONSTANTS_REL_SURFACE (2<<6) /* gen4/5 only */
/** Resets the SO write offset registers for transform feedback on gen7. */
#define I915_EXEC_GEN7_SOL_RESET (1<<8)
/** Request a privileged ("secure") batch buffer. Note only available for
* DRM_ROOT_ONLY | DRM_MASTER processes.
*/
#define I915_EXEC_SECURE (1<<9)
/** Inform the kernel that the batch is and will always be pinned. This
* negates the requirement for a workaround to be performed to avoid
* an incoherent CS (such as can be found on 830/845). If this flag is
* not passed, the kernel will endeavour to make sure the batch is
* coherent with the CS before execution. If this flag is passed,
* userspace assumes the responsibility for ensuring the same.
*/
#define I915_EXEC_IS_PINNED (1<<10)
/** Provide a hint to the kernel that the command stream and auxiliary
* state buffers already holds the correct presumed addresses and so the
* relocation process may be skipped if no buffers need to be moved in
* preparation for the execbuffer.
*/
#define I915_EXEC_NO_RELOC (1<<11)
/** Use the reloc.handle as an index into the exec object array rather
* than as the per-file handle.
*/
#define I915_EXEC_HANDLE_LUT (1<<12)
/** Used for switching BSD rings on the platforms with two BSD rings */
#define I915_EXEC_BSD_SHIFT (13)
#define I915_EXEC_BSD_MASK (3 << I915_EXEC_BSD_SHIFT)
/* default ping-pong mode */
#define I915_EXEC_BSD_DEFAULT (0 << I915_EXEC_BSD_SHIFT)
#define I915_EXEC_BSD_RING1 (1 << I915_EXEC_BSD_SHIFT)
#define I915_EXEC_BSD_RING2 (2 << I915_EXEC_BSD_SHIFT)
/** Tell the kernel that the batchbuffer is processed by
* the resource streamer.
*/
#define I915_EXEC_RESOURCE_STREAMER (1<<15)
/* Setting I915_EXEC_FENCE_IN implies that lower_32_bits(rsvd2) represent
* a sync_file fd to wait upon (in a nonblocking manner) prior to executing
* the batch.
*
* Returns -EINVAL if the sync_file fd cannot be found.
*/
#define I915_EXEC_FENCE_IN (1<<16)
/* Setting I915_EXEC_FENCE_OUT causes the ioctl to return a sync_file fd
* in the upper_32_bits(rsvd2) upon success. Ownership of the fd is given
* to the caller, and it should be close() after use. (The fd is a regular
* file descriptor and will be cleaned up on process termination. It holds
* a reference to the request, but nothing else.)
*
* The sync_file fd can be combined with other sync_file and passed either
* to execbuf using I915_EXEC_FENCE_IN, to atomic KMS ioctls (so that a flip
* will only occur after this request completes), or to other devices.
*
* Using I915_EXEC_FENCE_OUT requires use of
* DRM_IOCTL_I915_GEM_EXECBUFFER2_WR ioctl so that the result is written
* back to userspace. Failure to do so will cause the out-fence to always
* be reported as zero, and the real fence fd to be leaked.
*/
#define I915_EXEC_FENCE_OUT (1<<17)
/*
* Traditionally the execbuf ioctl has only considered the final element in
* the execobject[] to be the executable batch. Often though, the client
* will known the batch object prior to construction and being able to place
* it into the execobject[] array first can simplify the relocation tracking.
* Setting I915_EXEC_BATCH_FIRST tells execbuf to use element 0 of the
* execobject[] as the * batch instead (the default is to use the last
* element).
*/
#define I915_EXEC_BATCH_FIRST (1<<18)
/* Setting I915_FENCE_ARRAY implies that num_cliprects and cliprects_ptr
* define an array of i915_gem_exec_fence structures which specify a set of
* dma fences to wait upon or signal.
*/
#define I915_EXEC_FENCE_ARRAY (1<<19)
2019-05-21 21:11:34 +00:00
/*
* Setting I915_EXEC_FENCE_SUBMIT implies that lower_32_bits(rsvd2) represent
* a sync_file fd to wait upon (in a nonblocking manner) prior to executing
* the batch.
*
* Returns -EINVAL if the sync_file fd cannot be found.
*/
#define I915_EXEC_FENCE_SUBMIT (1 << 20)
/*
* Setting I915_EXEC_USE_EXTENSIONS implies that
* drm_i915_gem_execbuffer2.cliprects_ptr is treated as a pointer to an linked
* list of i915_user_extension. Each i915_user_extension node is the base of a
* larger structure. The list of supported structures are listed in the
* drm_i915_gem_execbuffer_ext enum.
*/
#define I915_EXEC_USE_EXTENSIONS (1 << 21)
#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_USE_EXTENSIONS << 1))
/** @rsvd1: Context id */
__u64 rsvd1;
/**
* @rsvd2: in and out sync_file file descriptors.
*
* When I915_EXEC_FENCE_IN or I915_EXEC_FENCE_SUBMIT flag is set, the
* lower 32 bits of this field will have the in sync_file fd (input).
*
* When I915_EXEC_FENCE_OUT flag is set, the upper 32 bits of this
* field will have the out sync_file fd (output).
*/
__u64 rsvd2;
};
#define I915_EXEC_CONTEXT_ID_MASK (0xffffffff)
#define i915_execbuffer2_set_context_id(eb2, context) \
(eb2).rsvd1 = context & I915_EXEC_CONTEXT_ID_MASK
#define i915_execbuffer2_get_context_id(eb2) \
((eb2).rsvd1 & I915_EXEC_CONTEXT_ID_MASK)
struct drm_i915_gem_pin {
/** Handle of the buffer to be pinned. */
__u32 handle;
__u32 pad;
/** alignment required within the aperture */
__u64 alignment;
/** Returned GTT offset of the buffer. */
__u64 offset;
};
struct drm_i915_gem_unpin {
/** Handle of the buffer to be unpinned. */
__u32 handle;
__u32 pad;
};
struct drm_i915_gem_busy {
/** Handle of the buffer to check for busy */
__u32 handle;
/** Return busy status
*
* A return of 0 implies that the object is idle (after
* having flushed any pending activity), and a non-zero return that
* the object is still in-flight on the GPU. (The GPU has not yet
* signaled completion for all pending requests that reference the
* object.) An object is guaranteed to become idle eventually (so
* long as no new GPU commands are executed upon it). Due to the
* asynchronous nature of the hardware, an object reported
* as busy may become idle before the ioctl is completed.
*
* Furthermore, if the object is busy, which engine is busy is only
* provided as a guide and only indirectly by reporting its class
* (there may be more than one engine in each class). There are race
* conditions which prevent the report of which engines are busy from
* being always accurate. However, the converse is not true. If the
* object is idle, the result of the ioctl, that all engines are idle,
* is accurate.
*
* The returned dword is split into two fields to indicate both
* the engine classes on which the object is being read, and the
* engine class on which it is currently being written (if any).
*
* The low word (bits 0:15) indicate if the object is being written
* to by any engine (there can only be one, as the GEM implicit
* synchronisation rules force writes to be serialised). Only the
* engine class (offset by 1, I915_ENGINE_CLASS_RENDER is reported as
* 1 not 0 etc) for the last write is reported.
*
* The high word (bits 16:31) are a bitmask of which engines classes
* are currently reading from the object. Multiple engines may be
* reading from the object simultaneously.
*
* The value of each engine class is the same as specified in the
* I915_CONTEXT_PARAM_ENGINES context parameter and via perf, i.e.
* I915_ENGINE_CLASS_RENDER, I915_ENGINE_CLASS_COPY, etc.
* Some hardware may have parallel execution engines, e.g. multiple
* media engines, which are mapped to the same class identifier and so
* are not separately reported for busyness.
*
* Caveat emptor:
* Only the boolean result of this query is reliable; that is whether
* the object is idle or busy. The report of which engines are busy
* should be only used as a heuristic.
*/
__u32 busy;
};
/**
* struct drm_i915_gem_caching - Set or get the caching for given object
* handle.
*
* Allow userspace to control the GTT caching bits for a given object when the
* object is later mapped through the ppGTT(or GGTT on older platforms lacking
* ppGTT support, or if the object is used for scanout). Note that this might
* require unbinding the object from the GTT first, if its current caching value
* doesn't match.
*
* Note that this all changes on discrete platforms, starting from DG1, the
* set/get caching is no longer supported, and is now rejected. Instead the CPU
* caching attributes(WB vs WC) will become an immutable creation time property
* for the object, along with the GTT caching level. For now we don't expose any
* new uAPI for this, instead on DG1 this is all implicit, although this largely
* shouldn't matter since DG1 is coherent by default(without any way of
* controlling it).
*
* Implicit caching rules, starting from DG1:
*
* - If any of the object placements (see &drm_i915_gem_create_ext_memory_regions)
* contain I915_MEMORY_CLASS_DEVICE then the object will be allocated and
* mapped as write-combined only.
*
* - Everything else is always allocated and mapped as write-back, with the
* guarantee that everything is also coherent with the GPU.
*
* Note that this is likely to change in the future again, where we might need
* more flexibility on future devices, so making this all explicit as part of a
* new &drm_i915_gem_create_ext extension is probable.
*
* Side note: Part of the reason for this is that changing the at-allocation-time CPU
* caching attributes for the pages might be required(and is expensive) if we
* need to then CPU map the pages later with different caching attributes. This
* inconsistent caching behaviour, while supported on x86, is not universally
* supported on other architectures. So for simplicity we opt for setting
* everything at creation time, whilst also making it immutable, on discrete
* platforms.
*/
struct drm_i915_gem_caching {
/**
* @handle: Handle of the buffer to set/get the caching level.
*/
__u32 handle;
/**
* @caching: The GTT caching level to apply or possible return value.
*
* The supported @caching values:
*
* I915_CACHING_NONE:
*
* GPU access is not coherent with CPU caches. Default for machines
* without an LLC. This means manual flushing might be needed, if we
* want GPU access to be coherent.
*
* I915_CACHING_CACHED:
*
* GPU access is coherent with CPU caches and furthermore the data is
* cached in last-level caches shared between CPU cores and the GPU GT.
*
* I915_CACHING_DISPLAY:
*
* Special GPU caching mode which is coherent with the scanout engines.
* Transparently falls back to I915_CACHING_NONE on platforms where no
* special cache mode (like write-through or gfdt flushing) is
* available. The kernel automatically sets this mode when using a
* buffer as a scanout target. Userspace can manually set this mode to
* avoid a costly stall and clflush in the hotpath of drawing the first
* frame.
*/
#define I915_CACHING_NONE 0
#define I915_CACHING_CACHED 1
#define I915_CACHING_DISPLAY 2
__u32 caching;
};
#define I915_TILING_NONE 0
#define I915_TILING_X 1
#define I915_TILING_Y 2
/*
* Do not add new tiling types here. The I915_TILING_* values are for
* de-tiling fence registers that no longer exist on modern platforms. Although
* the hardware may support new types of tiling in general (e.g., Tile4), we
* do not need to add them to the uapi that is specific to now-defunct ioctls.
*/
#define I915_TILING_LAST I915_TILING_Y
#define I915_BIT_6_SWIZZLE_NONE 0
#define I915_BIT_6_SWIZZLE_9 1
#define I915_BIT_6_SWIZZLE_9_10 2
#define I915_BIT_6_SWIZZLE_9_11 3
#define I915_BIT_6_SWIZZLE_9_10_11 4
/* Not seen by userland */
#define I915_BIT_6_SWIZZLE_UNKNOWN 5
/* Seen by userland. */
#define I915_BIT_6_SWIZZLE_9_17 6
#define I915_BIT_6_SWIZZLE_9_10_17 7
struct drm_i915_gem_set_tiling {
/** Handle of the buffer to have its tiling state updated */
__u32 handle;
/**
* Tiling mode for the object (I915_TILING_NONE, I915_TILING_X,
* I915_TILING_Y).
*
* This value is to be set on request, and will be updated by the
* kernel on successful return with the actual chosen tiling layout.
*
* The tiling mode may be demoted to I915_TILING_NONE when the system
* has bit 6 swizzling that can't be managed correctly by GEM.
*
* Buffer contents become undefined when changing tiling_mode.
*/
__u32 tiling_mode;
/**
* Stride in bytes for the object when in I915_TILING_X or
* I915_TILING_Y.
*/
__u32 stride;
/**
* Returned address bit 6 swizzling required for CPU access through
* mmap mapping.
*/
__u32 swizzle_mode;
};
struct drm_i915_gem_get_tiling {
/** Handle of the buffer to get tiling state for. */
__u32 handle;
/**
* Current tiling mode for the object (I915_TILING_NONE, I915_TILING_X,
* I915_TILING_Y).
*/
__u32 tiling_mode;
/**
* Returned address bit 6 swizzling required for CPU access through
* mmap mapping.
*/
__u32 swizzle_mode;
/**
* Returned address bit 6 swizzling required for CPU access through
* mmap mapping whilst bound.
*/
__u32 phys_swizzle_mode;
};
struct drm_i915_gem_get_aperture {
/** Total size of the aperture used by i915_gem_execbuffer, in bytes */
__u64 aper_size;
/**
* Available space in the aperture used by i915_gem_execbuffer, in
* bytes
*/
__u64 aper_available_size;
};
struct drm_i915_get_pipe_from_crtc_id {
/** ID of CRTC being requested **/
__u32 crtc_id;
/** pipe of requested CRTC **/
__u32 pipe;
};
#define I915_MADV_WILLNEED 0
#define I915_MADV_DONTNEED 1
#define __I915_MADV_PURGED 2 /* internal state */
struct drm_i915_gem_madvise {
/** Handle of the buffer to change the backing store advice */
__u32 handle;
/* Advice: either the buffer will be needed again in the near future,
* or won't be and could be discarded under memory pressure.
*/
__u32 madv;
/** Whether the backing store still exists. */
__u32 retained;
};
/* flags */
#define I915_OVERLAY_TYPE_MASK 0xff
#define I915_OVERLAY_YUV_PLANAR 0x01
#define I915_OVERLAY_YUV_PACKED 0x02
#define I915_OVERLAY_RGB 0x03
#define I915_OVERLAY_DEPTH_MASK 0xff00
#define I915_OVERLAY_RGB24 0x1000
#define I915_OVERLAY_RGB16 0x2000
#define I915_OVERLAY_RGB15 0x3000
#define I915_OVERLAY_YUV422 0x0100
#define I915_OVERLAY_YUV411 0x0200
#define I915_OVERLAY_YUV420 0x0300
#define I915_OVERLAY_YUV410 0x0400
#define I915_OVERLAY_SWAP_MASK 0xff0000
#define I915_OVERLAY_NO_SWAP 0x000000
#define I915_OVERLAY_UV_SWAP 0x010000
#define I915_OVERLAY_Y_SWAP 0x020000
#define I915_OVERLAY_Y_AND_UV_SWAP 0x030000
#define I915_OVERLAY_FLAGS_MASK 0xff000000
#define I915_OVERLAY_ENABLE 0x01000000
struct drm_intel_overlay_put_image {
/* various flags and src format description */
__u32 flags;
/* source picture description */
__u32 bo_handle;
/* stride values and offsets are in bytes, buffer relative */
__u16 stride_Y; /* stride for packed formats */
__u16 stride_UV;
__u32 offset_Y; /* offset for packet formats */
__u32 offset_U;
__u32 offset_V;
/* in pixels */
__u16 src_width;
__u16 src_height;
/* to compensate the scaling factors for partially covered surfaces */
__u16 src_scan_width;
__u16 src_scan_height;
/* output crtc description */
__u32 crtc_id;
__u16 dst_x;
__u16 dst_y;
__u16 dst_width;
__u16 dst_height;
};
/* flags */
#define I915_OVERLAY_UPDATE_ATTRS (1<<0)
#define I915_OVERLAY_UPDATE_GAMMA (1<<1)
#define I915_OVERLAY_DISABLE_DEST_COLORKEY (1<<2)
struct drm_intel_overlay_attrs {
__u32 flags;
__u32 color_key;
__s32 brightness;
__u32 contrast;
__u32 saturation;
__u32 gamma0;
__u32 gamma1;
__u32 gamma2;
__u32 gamma3;
__u32 gamma4;
__u32 gamma5;
};
/*
* Intel sprite handling
*
* Color keying works with a min/mask/max tuple. Both source and destination
* color keying is allowed.
*
* Source keying:
* Sprite pixels within the min & max values, masked against the color channels
* specified in the mask field, will be transparent. All other pixels will
* be displayed on top of the primary plane. For RGB surfaces, only the min
* and mask fields will be used; ranged compares are not allowed.
*
* Destination keying:
* Primary plane pixels that match the min value, masked against the color
* channels specified in the mask field, will be replaced by corresponding
* pixels from the sprite plane.
*
* Note that source & destination keying are exclusive; only one can be
* active on a given plane.
*/
#define I915_SET_COLORKEY_NONE (1<<0) /* Deprecated. Instead set
* flags==0 to disable colorkeying.
*/
#define I915_SET_COLORKEY_DESTINATION (1<<1)
#define I915_SET_COLORKEY_SOURCE (1<<2)
struct drm_intel_sprite_colorkey {
__u32 plane_id;
__u32 min_value;
__u32 channel_mask;
__u32 max_value;
__u32 flags;
};
struct drm_i915_gem_wait {
/** Handle of BO we shall wait on */
__u32 bo_handle;
__u32 flags;
/** Number of nanoseconds to wait, Returns time remaining. */
__s64 timeout_ns;
};
struct drm_i915_gem_context_create {
__u32 ctx_id; /* output: id of new context*/
__u32 pad;
};
/**
* struct drm_i915_gem_context_create_ext - Structure for creating contexts.
*/
struct drm_i915_gem_context_create_ext {
/** @ctx_id: Id of the created context (output) */
__u32 ctx_id;
/**
* @flags: Supported flags are:
*
* I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS:
*
* Extensions may be appended to this structure and driver must check
* for those. See @extensions.
*
* I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE
*
* Created context will have single timeline.
*/
drm/i915: Introduce mapping of user pages into video memory (userptr) ioctl By exporting the ability to map user address and inserting PTEs representing their backing pages into the GTT, we can exploit UMA in order to utilize normal application data as a texture source or even as a render target (depending upon the capabilities of the chipset). This has a number of uses, with zero-copy downloads to the GPU and efficient readback making the intermixed streaming of CPU and GPU operations fairly efficient. This ability has many widespread implications from faster rendering of client-side software rasterisers (chromium), mitigation of stalls due to read back (firefox) and to faster pipelining of texture data (such as pixel buffer objects in GL or data blobs in CL). v2: Compile with CONFIG_MMU_NOTIFIER v3: We can sleep while performing invalidate-range, which we can utilise to drop our page references prior to the kernel manipulating the vma (for either discard or cloning) and so protect normal users. v4: Only run the invalidate notifier if the range intercepts the bo. v5: Prevent userspace from attempting to GTT mmap non-page aligned buffers v6: Recheck after reacquire mutex for lost mmu. v7: Fix implicit padding of ioctl struct by rounding to next 64bit boundary. v8: Fix rebasing error after forwarding porting the back port. v9: Limit the userptr to page aligned entries. We now expect userspace to handle all the offset-in-page adjustments itself. v10: Prevent vma from being copied across fork to avoid issues with cow. v11: Drop vma behaviour changes -- locking is nigh on impossible. Use a worker to load user pages to avoid lock inversions. v12: Use get_task_mm()/mmput() for correct refcounting of mm. v13: Use a worker to release the mmu_notifier to avoid lock inversion v14: Decouple mmu_notifier from struct_mutex using a custom mmu_notifer with its own locking and tree of objects for each mm/mmu_notifier. v15: Prevent overlapping userptr objects, and invalidate all objects within the mmu_notifier range v16: Fix a typo for iterating over multiple objects in the range and rearrange error path to destroy the mmu_notifier locklessly. Also close a race between invalidate_range and the get_pages_worker. v17: Close a race between get_pages_worker/invalidate_range and fresh allocations of the same userptr range - and notice that struct_mutex was presumed to be held when during creation it wasn't. v18: Sigh. Fix the refactor of st_set_pages() to allocate enough memory for the struct sg_table and to clear it before reporting an error. v19: Always error out on read-only userptr requests as we don't have the hardware infrastructure to support them at the moment. v20: Refuse to implement read-only support until we have the required infrastructure - but reserve the bit in flags for future use. v21: use_mm() is not required for get_user_pages(). It is only meant to be used to fix up the kernel thread's current->mm for use with copy_user(). v22: Use sg_alloc_table_from_pages for that chunky feeling v23: Export a function for sanity checking dma-buf rather than encode userptr details elsewhere, and clean up comments based on suggestions by Bradley. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> Cc: "Gong, Zhipeng" <zhipeng.gong@intel.com> Cc: Akash Goel <akash.goel@intel.com> Cc: "Volkin, Bradley D" <bradley.d.volkin@intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> Reviewed-by: Brad Volkin <bradley.d.volkin@intel.com> [danvet: Frob ioctl allocation to pick the next one - will cause a bit of fuss with create2 apparently, but such are the rules.] [danvet2: oops, forgot to git add after manual patch application] [danvet3: Appease sparse.] Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-05-16 13:22:37 +00:00
__u32 flags;
#define I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS (1u << 0)
#define I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE (1u << 1)
#define I915_CONTEXT_CREATE_FLAGS_UNKNOWN \
(-(I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE << 1))
/**
* @extensions: Zero-terminated chain of extensions.
*
* I915_CONTEXT_CREATE_EXT_SETPARAM:
* Context parameter to set or query during context creation.
* See struct drm_i915_gem_context_create_ext_setparam.
*
* I915_CONTEXT_CREATE_EXT_CLONE:
* This extension has been removed. On the off chance someone somewhere
* has attempted to use it, never re-use this extension number.
*/
__u64 extensions;
#define I915_CONTEXT_CREATE_EXT_SETPARAM 0
#define I915_CONTEXT_CREATE_EXT_CLONE 1
drm/i915: Introduce mapping of user pages into video memory (userptr) ioctl By exporting the ability to map user address and inserting PTEs representing their backing pages into the GTT, we can exploit UMA in order to utilize normal application data as a texture source or even as a render target (depending upon the capabilities of the chipset). This has a number of uses, with zero-copy downloads to the GPU and efficient readback making the intermixed streaming of CPU and GPU operations fairly efficient. This ability has many widespread implications from faster rendering of client-side software rasterisers (chromium), mitigation of stalls due to read back (firefox) and to faster pipelining of texture data (such as pixel buffer objects in GL or data blobs in CL). v2: Compile with CONFIG_MMU_NOTIFIER v3: We can sleep while performing invalidate-range, which we can utilise to drop our page references prior to the kernel manipulating the vma (for either discard or cloning) and so protect normal users. v4: Only run the invalidate notifier if the range intercepts the bo. v5: Prevent userspace from attempting to GTT mmap non-page aligned buffers v6: Recheck after reacquire mutex for lost mmu. v7: Fix implicit padding of ioctl struct by rounding to next 64bit boundary. v8: Fix rebasing error after forwarding porting the back port. v9: Limit the userptr to page aligned entries. We now expect userspace to handle all the offset-in-page adjustments itself. v10: Prevent vma from being copied across fork to avoid issues with cow. v11: Drop vma behaviour changes -- locking is nigh on impossible. Use a worker to load user pages to avoid lock inversions. v12: Use get_task_mm()/mmput() for correct refcounting of mm. v13: Use a worker to release the mmu_notifier to avoid lock inversion v14: Decouple mmu_notifier from struct_mutex using a custom mmu_notifer with its own locking and tree of objects for each mm/mmu_notifier. v15: Prevent overlapping userptr objects, and invalidate all objects within the mmu_notifier range v16: Fix a typo for iterating over multiple objects in the range and rearrange error path to destroy the mmu_notifier locklessly. Also close a race between invalidate_range and the get_pages_worker. v17: Close a race between get_pages_worker/invalidate_range and fresh allocations of the same userptr range - and notice that struct_mutex was presumed to be held when during creation it wasn't. v18: Sigh. Fix the refactor of st_set_pages() to allocate enough memory for the struct sg_table and to clear it before reporting an error. v19: Always error out on read-only userptr requests as we don't have the hardware infrastructure to support them at the moment. v20: Refuse to implement read-only support until we have the required infrastructure - but reserve the bit in flags for future use. v21: use_mm() is not required for get_user_pages(). It is only meant to be used to fix up the kernel thread's current->mm for use with copy_user(). v22: Use sg_alloc_table_from_pages for that chunky feeling v23: Export a function for sanity checking dma-buf rather than encode userptr details elsewhere, and clean up comments based on suggestions by Bradley. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> Cc: "Gong, Zhipeng" <zhipeng.gong@intel.com> Cc: Akash Goel <akash.goel@intel.com> Cc: "Volkin, Bradley D" <bradley.d.volkin@intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> Reviewed-by: Brad Volkin <bradley.d.volkin@intel.com> [danvet: Frob ioctl allocation to pick the next one - will cause a bit of fuss with create2 apparently, but such are the rules.] [danvet2: oops, forgot to git add after manual patch application] [danvet3: Appease sparse.] Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-05-16 13:22:37 +00:00
};
/**
* struct drm_i915_gem_context_param - Context parameter to set or query.
*/
struct drm_i915_gem_context_param {
/** @ctx_id: Context id */
__u32 ctx_id;
/** @size: Size of the parameter @value */
__u32 size;
/** @param: Parameter to set or query */
__u64 param;
#define I915_CONTEXT_PARAM_BAN_PERIOD 0x1
/* I915_CONTEXT_PARAM_NO_ZEROMAP has been removed. On the off chance
* someone somewhere has attempted to use it, never re-use this context
* param number.
*/
#define I915_CONTEXT_PARAM_NO_ZEROMAP 0x2
#define I915_CONTEXT_PARAM_GTT_SIZE 0x3
#define I915_CONTEXT_PARAM_NO_ERROR_CAPTURE 0x4
#define I915_CONTEXT_PARAM_BANNABLE 0x5
drm/i915/scheduler: Support user-defined priorities Use a priority stored in the context as the initial value when submitting a request. This allows us to change the default priority on a per-context basis, allowing different contexts to be favoured with GPU time at the expense of lower importance work. The user can adjust the context's priority via I915_CONTEXT_PARAM_PRIORITY, with more positive values being higher priority (they will be serviced earlier, after their dependencies have been resolved). Any prerequisite work for an execbuf will have its priority raised to match the new request as required. Normal users can specify any value in the range of -1023 to 0 [default], i.e. they can reduce the priority of their workloads (and temporarily boost it back to normal if so desired). Privileged users can specify any value in the range of -1023 to 1023, [default is 0], i.e. they can raise their priority above all overs and so potentially starve the system. Note that the existing schedulers are not fair, nor load balancing, the execution is strictly by priority on a first-come, first-served basis, and the driver may choose to boost some requests above the range available to users. This priority was originally based around nice(2), but evolved to allow clients to adjust their priority within a small range, and allow for a privileged high priority range. For example, this can be used to implement EGL_IMG_context_priority https://www.khronos.org/registry/egl/extensions/IMG/EGL_IMG_context_priority.txt EGL_CONTEXT_PRIORITY_LEVEL_IMG determines the priority level of the context to be created. This attribute is a hint, as an implementation may not support multiple contexts at some priority levels and system policy may limit access to high priority contexts to appropriate system privilege level. The default value for EGL_CONTEXT_PRIORITY_LEVEL_IMG is EGL_CONTEXT_PRIORITY_MEDIUM_IMG." so we can map PRIORITY_HIGH -> 1023 [privileged, will failback to 0] PRIORITY_MED -> 0 [default] PRIORITY_LOW -> -1023 They also map onto the priorities used by VkQueue (and a VkQueue is essentially a timeline, our i915_gem_context under full-ppgtt). v2: s/CAP_SYS_ADMIN/CAP_SYS_NICE/ v3: Report min/max user priorities as defines in the uapi, and rebase internal priorities on the exposed values. Testcase: igt/gem_exec_schedule Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20171003203453.15692-9-chris@chris-wilson.co.uk
2017-10-03 20:34:53 +00:00
#define I915_CONTEXT_PARAM_PRIORITY 0x6
#define I915_CONTEXT_MAX_USER_PRIORITY 1023 /* inclusive */
#define I915_CONTEXT_DEFAULT_PRIORITY 0
#define I915_CONTEXT_MIN_USER_PRIORITY -1023 /* inclusive */
drm/i915: Expose RPCS (SSEU) configuration to userspace (Gen11 only) We want to allow userspace to reconfigure the subslice configuration on a per context basis. This is required for the functional requirement of shutting down non-VME enabled sub-slices on Gen11 parts. To do so, we expose a context parameter to allow adjustment of the RPCS register stored within the context image (and currently not accessible via LRI). If the context is adjusted before first use or whilst idle, the adjustment is for "free"; otherwise if the context is active we queue a request to do so (using the kernel context), following all other activity by that context, which is also marked as barrier for all following submission against the same context. Since the overhead of device re-configuration during context switching can be significant, especially in multi-context workloads, we limit this new uAPI to only support the Gen11 VME use case. In this use case either the device is fully enabled, and exactly one slice and half of the subslices are enabled. Example usage: struct drm_i915_gem_context_param_sseu sseu = { }; struct drm_i915_gem_context_param arg = { .param = I915_CONTEXT_PARAM_SSEU, .ctx_id = gem_context_create(fd), .size = sizeof(sseu), .value = to_user_pointer(&sseu) }; /* Query device defaults. */ gem_context_get_param(fd, &arg); /* Set VME configuration on a 1x6x8 part. */ sseu.slice_mask = 0x1; sseu.subslice_mask = 0xe0; gem_context_set_param(fd, &arg); v2: Fix offset of CTX_R_PWR_CLK_STATE in intel_lr_context_set_sseu() (Lionel) v3: Add ability to program this per engine (Chris) v4: Move most get_sseu() into i915_gem_context.c (Lionel) v5: Validate sseu configuration against the device's capabilities (Lionel) v6: Change context powergating settings through MI_SDM on kernel context (Chris) v7: Synchronize the requests following a powergating setting change using a global dependency (Chris) Iterate timelines through dev_priv.gt.active_rings (Tvrtko) Disable RPCS configuration setting for non capable users (Lionel/Tvrtko) v8: s/union intel_sseu/struct intel_sseu/ (Lionel) s/dev_priv/i915/ (Tvrtko) Change uapi class/instance fields to u16 (Tvrtko) Bump mask fields to 64bits (Lionel) Don't return EPERM when dynamic sseu is disabled (Tvrtko) v9: Import context image into kernel context's ppgtt only when reconfiguring powergated slice/subslices (Chris) Use aliasing ppgtt when needed (Michel) Tvrtko Ursulin: v10: * Update for upstream changes. * Request submit needs a RPM reference. * Reject on !FULL_PPGTT for simplicity. * Pull out get/set param to helpers for readability and less indent. * Use i915_request_await_dma_fence in add_global_barrier to skip waits on the same timeline and avoid GEM_BUG_ON. * No need to explicitly assign a NULL pointer to engine in legacy mode. * No need to move gen8_make_rpcs up. * Factored out global barrier as prep patch. * Allow to only CAP_SYS_ADMIN if !Gen11. v11: * Remove engine vfunc in favour of local helper. (Chris Wilson) * Stop retiring requests before updates since it is not needed (Chris Wilson) * Implement direct CPU update path for idle contexts. (Chris Wilson) * Left side dependency needs only be on the same context timeline. (Chris Wilson) * It is sufficient to order the timeline. (Chris Wilson) * Reject !RCS configuration attempts with -ENODEV for now. v12: * Rebase for make_rpcs. v13: * Centralize SSEU normalization to make_rpcs. * Type width checking (uAPI <-> implementation). * Gen11 restrictions uAPI checks. * Gen11 subslice count differences handling. Chris Wilson: * args->size handling fixes. * Update context image from GGTT. * Postpone context image update to pinning. * Use i915_gem_active_raw instead of last_request_on_engine. v14: * Add activity tracker on intel_context to fix the lifetime issues and simplify the code. (Chris Wilson) v15: * Fix context pin leak if no space in ring by simplifying the context pinning sequence. v16: * Rebase for context get/set param locking changes. * Just -ENODEV on !Gen11. (Joonas) v17: * Fix one Gen11 subslice enablement rule. * Handle error from i915_sw_fence_await_sw_fence_gfp. (Chris Wilson) v18: * Update commit message. (Joonas) * Restrict uAPI to VME use case. (Joonas) v19: * Rebase. v20: * Rebase for ce->active_tracker. v21: * Rebase for IS_GEN changes. v22: * Reserve uAPI for flags straight away. (Chris Wilson) v23: * Rebase for RUNTIME_INFO. v24: * Added some headline docs for the uapi usage. (Joonas/Chris) v25: * Renamed class/instance to engine_class/engine_instance to avoid clash with C++ keyword. (Tony Ye) v26: * Rebased for runtime pm api changes. v27: * Rebased for intel_context_init. * Wrap commit msg to 75. v28: (Chris Wilson) * Use i915_gem_ggtt. * Use i915_request_await_dma_fence to show a better example. v29: * i915_timeline_set_barrier can now fail. (Chris Wilson) v30: * Capture some acks. v31: * Drop the WARN_ON from use controllable paths. (Chris Wilson) * Use overflows_type for all checks. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100899 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107634 Issue: https://github.com/intel/media-driver/issues/267 Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Cc: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Cc: Zhipeng Gong <zhipeng.gong@intel.com> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Tony Ye <tony.ye@intel.com> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Acked-by: Timo Aaltonen <timo.aaltonen@canonical.com> Acked-by: Takashi Iwai <tiwai@suse.de> Acked-by: Stéphane Marchesin <marcheu@chromium.org> Link: https://patchwork.freedesktop.org/patch/msgid/20190205095032.22673-4-tvrtko.ursulin@linux.intel.com
2019-02-05 09:50:31 +00:00
/*
* When using the following param, value should be a pointer to
* drm_i915_gem_context_param_sseu.
*/
#define I915_CONTEXT_PARAM_SSEU 0x7
/*
* Not all clients may want to attempt automatic recover of a context after
* a hang (for example, some clients may only submit very small incremental
* batches relying on known logical state of previous batches which will never
* recover correctly and each attempt will hang), and so would prefer that
* the context is forever banned instead.
*
* If set to false (0), after a reset, subsequent (and in flight) rendering
* from this context is discarded, and the client will need to create a new
* context to use instead.
*
* If set to true (1), the kernel will automatically attempt to recover the
* context by skipping the hanging batch and executing the next batch starting
* from the default context state (discarding the incomplete logical context
* state lost due to the reset).
*
* On creation, all new contexts are marked as recoverable.
*/
#define I915_CONTEXT_PARAM_RECOVERABLE 0x8
/*
* The id of the associated virtual memory address space (ppGTT) of
* this context. Can be retrieved and passed to another context
* (on the same fd) for both to use the same ppGTT and so share
* address layouts, and avoid reloading the page tables on context
* switches between themselves.
*
* See DRM_I915_GEM_VM_CREATE and DRM_I915_GEM_VM_DESTROY.
*/
#define I915_CONTEXT_PARAM_VM 0x9
drm/i915: Allow a context to define its set of engines Over the last few years, we have debated how to extend the user API to support an increase in the number of engines, that may be sparse and even be heterogeneous within a class (not all video decoders created equal). We settled on using (class, instance) tuples to identify a specific engine, with an API for the user to construct a map of engines to capabilities. Into this picture, we then add a challenge of virtual engines; one user engine that maps behind the scenes to any number of physical engines. To keep it general, we want the user to have full control over that mapping. To that end, we allow the user to constrain a context to define the set of engines that it can access, order fully controlled by the user via (class, instance). With such precise control in context setup, we can continue to use the existing execbuf uABI of specifying a single index; only now it doesn't automagically map onto the engines, it uses the user defined engine map from the context. v2: Fixup freeing of local on success of get_engines() v3: Allow empty engines[] v4: s/nengine/num_engines/ v5: Replace 64 limit on num_engines with a note that execbuf is currently limited to only using the first 64 engines. v6: Actually use the engines_mutex to guard the ctx->engines. Testcase: igt/gem_ctx_engines Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20190521211134.16117-2-chris@chris-wilson.co.uk
2019-05-21 21:11:26 +00:00
/*
* I915_CONTEXT_PARAM_ENGINES:
*
* Bind this context to operate on this subset of available engines. Henceforth,
* the I915_EXEC_RING selector for DRM_IOCTL_I915_GEM_EXECBUFFER2 operates as
* an index into this array of engines; I915_EXEC_DEFAULT selecting engine[0]
* and upwards. Slots 0...N are filled in using the specified (class, instance).
* Use
* engine_class: I915_ENGINE_CLASS_INVALID,
* engine_instance: I915_ENGINE_CLASS_INVALID_NONE
* to specify a gap in the array that can be filled in later, e.g. by a
* virtual engine used for load balancing.
*
* Setting the number of engines bound to the context to 0, by passing a zero
* sized argument, will revert back to default settings.
*
* See struct i915_context_param_engines.
*
* Extensions:
* i915_context_engines_load_balance (I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE)
* i915_context_engines_bond (I915_CONTEXT_ENGINES_EXT_BOND)
* i915_context_engines_parallel_submit (I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT)
drm/i915: Allow a context to define its set of engines Over the last few years, we have debated how to extend the user API to support an increase in the number of engines, that may be sparse and even be heterogeneous within a class (not all video decoders created equal). We settled on using (class, instance) tuples to identify a specific engine, with an API for the user to construct a map of engines to capabilities. Into this picture, we then add a challenge of virtual engines; one user engine that maps behind the scenes to any number of physical engines. To keep it general, we want the user to have full control over that mapping. To that end, we allow the user to constrain a context to define the set of engines that it can access, order fully controlled by the user via (class, instance). With such precise control in context setup, we can continue to use the existing execbuf uABI of specifying a single index; only now it doesn't automagically map onto the engines, it uses the user defined engine map from the context. v2: Fixup freeing of local on success of get_engines() v3: Allow empty engines[] v4: s/nengine/num_engines/ v5: Replace 64 limit on num_engines with a note that execbuf is currently limited to only using the first 64 engines. v6: Actually use the engines_mutex to guard the ctx->engines. Testcase: igt/gem_ctx_engines Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20190521211134.16117-2-chris@chris-wilson.co.uk
2019-05-21 21:11:26 +00:00
*/
#define I915_CONTEXT_PARAM_ENGINES 0xa
drm/i915/gem: Make context persistence optional Our existing behaviour is to allow contexts and their GPU requests to persist past the point of closure until the requests are complete. This allows clients to operate in a 'fire-and-forget' manner where they can setup a rendering pipeline and hand it over to the display server and immediately exit. As the rendering pipeline is kept alive until completion, the display server (or other consumer) can use the results in the future and present them to the user. The compute model is a little different. They have little to no buffer sharing between processes as their kernels tend to operate on a continuous stream, feeding the results back to the client application. These kernels operate for an indeterminate length of time, with many clients wishing that the kernel was always running for as long as they keep feeding in the data, i.e. acting like a DSP. Not all clients want this persistent "desktop" behaviour and would prefer that the contexts are cleaned up immediately upon closure. This ensures that when clients are run without hangchecking (e.g. for compute kernels of indeterminate runtime), any GPU hang or other unexpected workloads are terminated with the process and does not continue to hog resources. The default behaviour for new contexts is the legacy persistence mode, as some desktop applications are dependent upon the existing behaviour. New clients will have to opt in to immediate cleanup on context closure. If the hangchecking modparam is disabled, so is persistent context support -- all contexts will be terminated on closure. We expect this behaviour change to be welcomed by compute users, who have often been caught between a rock and a hard place. They disable hangchecking to avoid their kernels being "unfairly" declared hung, but have also experienced true hangs that the system was then unable to clean up. Naturally, this leads to bug reports. Testcase: igt/gem_ctx_persistence Link: https://github.com/intel/compute-runtime/pull/228 Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Michał Winiarski <michal.winiarski@intel.com> Cc: Jon Bloomfield <jon.bloomfield@intel.com> Reviewed-by: Jon Bloomfield <jon.bloomfield@intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Acked-by: Jason Ekstrand <jason@jlekstrand.net> Link: https://patchwork.freedesktop.org/patch/msgid/20191029202338.8841-1-chris@chris-wilson.co.uk
2019-10-29 20:23:38 +00:00
/*
* I915_CONTEXT_PARAM_PERSISTENCE:
*
* Allow the context and active rendering to survive the process until
* completion. Persistence allows fire-and-forget clients to queue up a
* bunch of work, hand the output over to a display server and then quit.
* If the context is marked as not persistent, upon closing (either via
* an explicit DRM_I915_GEM_CONTEXT_DESTROY or implicitly from file closure
* or process termination), the context and any outstanding requests will be
* cancelled (and exported fences for cancelled requests marked as -EIO).
*
* By default, new contexts allow persistence.
*/
#define I915_CONTEXT_PARAM_PERSISTENCE 0xb
drm/i915: Drop I915_CONTEXT_PARAM_RINGSIZE This reverts commit 88be76cdafc7 ("drm/i915: Allow userspace to specify ringsize on construction"). This API was originally added for OpenCL but the compute-runtime PR has sat open for a year without action so we can still pull it out if we want. I argue we should drop it for three reasons: 1. If the compute-runtime PR has sat open for a year, this clearly isn't that important. 2. It's a very leaky API. Ring size is an implementation detail of the current execlist scheduler and really only makes sense there. It can't apply to the older ring-buffer scheduler on pre-execlist hardware because that's shared across all contexts and it won't apply to the GuC scheduler that's in the pipeline. 3. Having userspace set a ring size in bytes is a bad solution to the problem of having too small a ring. There is no way that userspace has the information to know how to properly set the ring size so it's just going to detect the feature and always set it to the maximum of 512K. This is what the compute-runtime PR does. The scheduler in i915, on the other hand, does have the information to make an informed choice. It could detect if the ring size is a problem and grow it itself. Or, if that's too hard, we could just increase the default size from 16K to 32K or even 64K instead of relying on userspace to do it. Let's drop this API for now and, if someone decides they really care about solving this problem, they can do it properly. Signed-off-by: Jason Ekstrand <jason@jlekstrand.net> Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch> Link: https://patchwork.freedesktop.org/patch/msgid/20210708154835.528166-2-jason@jlekstrand.net
2021-07-08 15:48:06 +00:00
/* This API has been removed. On the off chance someone somewhere has
* attempted to use it, never re-use this context param number.
*/
#define I915_CONTEXT_PARAM_RINGSIZE 0xc
drm/i915/pxp: interfaces for using protected objects This api allow user mode to create protected buffers and to mark contexts as making use of such objects. Only when using contexts marked in such a way is the execution guaranteed to work as expected. Contexts can only be marked as using protected content at creation time (i.e. the parameter is immutable) and they must be both bannable and not recoverable. Given that the protected session gets invalidated on suspend, contexts created this way hold a runtime pm wakeref until they're either destroyed or invalidated. All protected objects and contexts will be considered invalid when the PXP session is destroyed and all new submissions using them will be rejected. All intel contexts within the invalidated gem contexts will be marked banned. Userspace can detect that an invalidation has occurred via the RESET_STATS ioctl, where we report it the same way as a ban due to a hang. v5: squash patches, rebase on proto_ctx, update kerneldoc v6: rebase on obj create_ext changes v7: Use session counter to check if an object it valid, hold wakeref in context, don't add a new flag to RESET_STATS (Daniel) v8: don't increase guilty count for contexts banned during pxp invalidation (Rodrigo) v9: better comments, avoid wakeref put race between pxp_inval and context_close, add usage examples (Rodrigo) v10: modify internal set/get-protected-context functions to not return -ENODEV when setting PXP param to false or getting param when running on pxp-unsupported hw or getting param when i915 was built with CONFIG_PXP off Signed-off-by: Alan Previn <alan.previn.teres.alexis@intel.com> Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> Signed-off-by: Bommu Krishnaiah <krishnaiah.bommu@intel.com> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Cc: Jason Ekstrand <jason@jlekstrand.net> Cc: Daniel Vetter <daniel.vetter@intel.com> Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com> Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20210924191452.1539378-11-alan.previn.teres.alexis@intel.com
2021-09-24 19:14:45 +00:00
/*
* I915_CONTEXT_PARAM_PROTECTED_CONTENT:
*
* Mark that the context makes use of protected content, which will result
* in the context being invalidated when the protected content session is.
* Given that the protected content session is killed on suspend, the device
* is kept awake for the lifetime of a protected context, so the user should
* make sure to dispose of them once done.
* This flag can only be set at context creation time and, when set to true,
* must be preceded by an explicit setting of I915_CONTEXT_PARAM_RECOVERABLE
* to false. This flag can't be set to true in conjunction with setting the
* I915_CONTEXT_PARAM_BANNABLE flag to false. Creation example:
*
* .. code-block:: C
*
* struct drm_i915_gem_context_create_ext_setparam p_protected = {
* .base = {
* .name = I915_CONTEXT_CREATE_EXT_SETPARAM,
* },
* .param = {
* .param = I915_CONTEXT_PARAM_PROTECTED_CONTENT,
* .value = 1,
* }
* };
* struct drm_i915_gem_context_create_ext_setparam p_norecover = {
* .base = {
* .name = I915_CONTEXT_CREATE_EXT_SETPARAM,
* .next_extension = to_user_pointer(&p_protected),
* },
* .param = {
* .param = I915_CONTEXT_PARAM_RECOVERABLE,
* .value = 0,
* }
* };
* struct drm_i915_gem_context_create_ext create = {
* .flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS,
* .extensions = to_user_pointer(&p_norecover);
* };
*
* ctx_id = gem_context_create_ext(drm_fd, &create);
*
* In addition to the normal failure cases, setting this flag during context
* creation can result in the following errors:
*
* -ENODEV: feature not available
* -EPERM: trying to mark a recoverable or not bannable context as protected
drm/i915/pxp: Add ARB session creation and cleanup Add MTL's function for ARB session creation using PXP firmware version 4.3 ABI structure format. While relooking at the ARB session creation flow in intel_pxp_start, let's address missing UAPI documentation. Without actually changing backward compatible behavior, update i915's drm-uapi comments that describe the possible error values when creating a context with I915_CONTEXT_PARAM_PROTECTED_CONTENT: Since the first merge of PXP support on ADL, i915 returns -ENXIO if a dependency such as firmware or component driver was yet to be loaded or returns -EIO if the creation attempt failed when requested by the PXP firmware (specific firmware error responses are reported in dmesg). Add MTL's function for ARB session invalidation but this reuses PXP firmware version 4.2 ABI structure format. For both cases, in the back-end gsccs functions for sending messages to the firmware inspect the GSC-CS-Mem-Header's pending-bit which means the GSC firmware is busy and we should retry. Given the last hw requirement, lets also update functions in front-end layer that wait for session creation or teardown completion to use new worst case timeout periods. Signed-off-by: Alan Previn <alan.previn.teres.alexis@intel.com> Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> Signed-off-by: Radhakrishna Sripada <radhakrishna.sripada@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20230511231738.1077674-6-alan.previn.teres.alexis@intel.com
2023-05-11 23:17:35 +00:00
* -ENXIO: A dependency such as a component driver or firmware is not yet
* loaded so user space may need to attempt again. Depending on the
* device, this error may be reported if protected context creation is
* attempted very early after kernel start because the internal timeout
* waiting for such dependencies is not guaranteed to be larger than
* required (numbers differ depending on system and kernel config):
* - ADL/RPL: dependencies may take up to 3 seconds from kernel start
* while context creation internal timeout is 250 milisecs
* - MTL: dependencies may take up to 8 seconds from kernel start
* while context creation internal timeout is 250 milisecs
* NOTE: such dependencies happen once, so a subsequent call to create a
* protected context after a prior successful call will not experience
* such timeouts and will not return -ENXIO (unless the driver is reloaded,
* or, depending on the device, resumes from a suspended state).
* -EIO: The firmware did not succeed in creating the protected context.
drm/i915/pxp: interfaces for using protected objects This api allow user mode to create protected buffers and to mark contexts as making use of such objects. Only when using contexts marked in such a way is the execution guaranteed to work as expected. Contexts can only be marked as using protected content at creation time (i.e. the parameter is immutable) and they must be both bannable and not recoverable. Given that the protected session gets invalidated on suspend, contexts created this way hold a runtime pm wakeref until they're either destroyed or invalidated. All protected objects and contexts will be considered invalid when the PXP session is destroyed and all new submissions using them will be rejected. All intel contexts within the invalidated gem contexts will be marked banned. Userspace can detect that an invalidation has occurred via the RESET_STATS ioctl, where we report it the same way as a ban due to a hang. v5: squash patches, rebase on proto_ctx, update kerneldoc v6: rebase on obj create_ext changes v7: Use session counter to check if an object it valid, hold wakeref in context, don't add a new flag to RESET_STATS (Daniel) v8: don't increase guilty count for contexts banned during pxp invalidation (Rodrigo) v9: better comments, avoid wakeref put race between pxp_inval and context_close, add usage examples (Rodrigo) v10: modify internal set/get-protected-context functions to not return -ENODEV when setting PXP param to false or getting param when running on pxp-unsupported hw or getting param when i915 was built with CONFIG_PXP off Signed-off-by: Alan Previn <alan.previn.teres.alexis@intel.com> Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> Signed-off-by: Bommu Krishnaiah <krishnaiah.bommu@intel.com> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Cc: Jason Ekstrand <jason@jlekstrand.net> Cc: Daniel Vetter <daniel.vetter@intel.com> Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com> Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20210924191452.1539378-11-alan.previn.teres.alexis@intel.com
2021-09-24 19:14:45 +00:00
*/
#define I915_CONTEXT_PARAM_PROTECTED_CONTENT 0xd
drm/i915/guc: Use context hints for GT frequency Allow user to provide a low latency context hint. When set, KMD sends a hint to GuC which results in special handling for this context. SLPC will ramp the GT frequency aggressively every time it switches to this context. The down freq threshold will also be lower so GuC will ramp down the GT freq for this context more slowly. We also disable waitboost for this context as that will interfere with the strategy. We need to enable the use of SLPC Compute strategy during init, but it will apply only to contexts that set this bit during context creation. Userland can check whether this feature is supported using a new param- I915_PARAM_HAS_CONTEXT_FREQ_HINT. This flag is true for all guc submission enabled platforms as they use SLPC for frequency management. The Mesa usage model for this flag is here - https://gitlab.freedesktop.org/sushmave/mesa/-/commits/compute_hint v2: Rename flags as per review suggestions (Rodrigo, Tvrtko). Also, use flag bits in intel_context as it allows finer control for toggling per engine if needed (Tvrtko). v3: Minor review comments (Tvrtko) v4: Update comment (Sushma) Cc: Rodrigo Vivi <rodrigo.vivi@intel.com> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> Cc: Sushma Venkatesh Reddy <sushma.venkatesh.reddy@intel.com> Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com> Acked-by: Ivan Briano <ivan.briano@intel.com> Signed-off-by: Vinay Belgaumkar <vinay.belgaumkar@intel.com> Signed-off-by: John Harrison <John.C.Harrison@Intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20240306012759.204938-1-vinay.belgaumkar@intel.com
2024-03-06 01:27:59 +00:00
/*
* I915_CONTEXT_PARAM_LOW_LATENCY:
*
* Mark this context as a low latency workload which requires aggressive GT
* frequency scaling. Use I915_PARAM_HAS_CONTEXT_FREQ_HINT to check if the kernel
* supports this per context flag.
*/
#define I915_CONTEXT_PARAM_LOW_LATENCY 0xe
/* Must be kept compact -- no holes and well documented */
/** @value: Context parameter value to be set or queried */
__u64 value;
};
/*
drm/i915: Expose RPCS (SSEU) configuration to userspace (Gen11 only) We want to allow userspace to reconfigure the subslice configuration on a per context basis. This is required for the functional requirement of shutting down non-VME enabled sub-slices on Gen11 parts. To do so, we expose a context parameter to allow adjustment of the RPCS register stored within the context image (and currently not accessible via LRI). If the context is adjusted before first use or whilst idle, the adjustment is for "free"; otherwise if the context is active we queue a request to do so (using the kernel context), following all other activity by that context, which is also marked as barrier for all following submission against the same context. Since the overhead of device re-configuration during context switching can be significant, especially in multi-context workloads, we limit this new uAPI to only support the Gen11 VME use case. In this use case either the device is fully enabled, and exactly one slice and half of the subslices are enabled. Example usage: struct drm_i915_gem_context_param_sseu sseu = { }; struct drm_i915_gem_context_param arg = { .param = I915_CONTEXT_PARAM_SSEU, .ctx_id = gem_context_create(fd), .size = sizeof(sseu), .value = to_user_pointer(&sseu) }; /* Query device defaults. */ gem_context_get_param(fd, &arg); /* Set VME configuration on a 1x6x8 part. */ sseu.slice_mask = 0x1; sseu.subslice_mask = 0xe0; gem_context_set_param(fd, &arg); v2: Fix offset of CTX_R_PWR_CLK_STATE in intel_lr_context_set_sseu() (Lionel) v3: Add ability to program this per engine (Chris) v4: Move most get_sseu() into i915_gem_context.c (Lionel) v5: Validate sseu configuration against the device's capabilities (Lionel) v6: Change context powergating settings through MI_SDM on kernel context (Chris) v7: Synchronize the requests following a powergating setting change using a global dependency (Chris) Iterate timelines through dev_priv.gt.active_rings (Tvrtko) Disable RPCS configuration setting for non capable users (Lionel/Tvrtko) v8: s/union intel_sseu/struct intel_sseu/ (Lionel) s/dev_priv/i915/ (Tvrtko) Change uapi class/instance fields to u16 (Tvrtko) Bump mask fields to 64bits (Lionel) Don't return EPERM when dynamic sseu is disabled (Tvrtko) v9: Import context image into kernel context's ppgtt only when reconfiguring powergated slice/subslices (Chris) Use aliasing ppgtt when needed (Michel) Tvrtko Ursulin: v10: * Update for upstream changes. * Request submit needs a RPM reference. * Reject on !FULL_PPGTT for simplicity. * Pull out get/set param to helpers for readability and less indent. * Use i915_request_await_dma_fence in add_global_barrier to skip waits on the same timeline and avoid GEM_BUG_ON. * No need to explicitly assign a NULL pointer to engine in legacy mode. * No need to move gen8_make_rpcs up. * Factored out global barrier as prep patch. * Allow to only CAP_SYS_ADMIN if !Gen11. v11: * Remove engine vfunc in favour of local helper. (Chris Wilson) * Stop retiring requests before updates since it is not needed (Chris Wilson) * Implement direct CPU update path for idle contexts. (Chris Wilson) * Left side dependency needs only be on the same context timeline. (Chris Wilson) * It is sufficient to order the timeline. (Chris Wilson) * Reject !RCS configuration attempts with -ENODEV for now. v12: * Rebase for make_rpcs. v13: * Centralize SSEU normalization to make_rpcs. * Type width checking (uAPI <-> implementation). * Gen11 restrictions uAPI checks. * Gen11 subslice count differences handling. Chris Wilson: * args->size handling fixes. * Update context image from GGTT. * Postpone context image update to pinning. * Use i915_gem_active_raw instead of last_request_on_engine. v14: * Add activity tracker on intel_context to fix the lifetime issues and simplify the code. (Chris Wilson) v15: * Fix context pin leak if no space in ring by simplifying the context pinning sequence. v16: * Rebase for context get/set param locking changes. * Just -ENODEV on !Gen11. (Joonas) v17: * Fix one Gen11 subslice enablement rule. * Handle error from i915_sw_fence_await_sw_fence_gfp. (Chris Wilson) v18: * Update commit message. (Joonas) * Restrict uAPI to VME use case. (Joonas) v19: * Rebase. v20: * Rebase for ce->active_tracker. v21: * Rebase for IS_GEN changes. v22: * Reserve uAPI for flags straight away. (Chris Wilson) v23: * Rebase for RUNTIME_INFO. v24: * Added some headline docs for the uapi usage. (Joonas/Chris) v25: * Renamed class/instance to engine_class/engine_instance to avoid clash with C++ keyword. (Tony Ye) v26: * Rebased for runtime pm api changes. v27: * Rebased for intel_context_init. * Wrap commit msg to 75. v28: (Chris Wilson) * Use i915_gem_ggtt. * Use i915_request_await_dma_fence to show a better example. v29: * i915_timeline_set_barrier can now fail. (Chris Wilson) v30: * Capture some acks. v31: * Drop the WARN_ON from use controllable paths. (Chris Wilson) * Use overflows_type for all checks. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100899 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107634 Issue: https://github.com/intel/media-driver/issues/267 Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Cc: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Cc: Zhipeng Gong <zhipeng.gong@intel.com> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Tony Ye <tony.ye@intel.com> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Acked-by: Timo Aaltonen <timo.aaltonen@canonical.com> Acked-by: Takashi Iwai <tiwai@suse.de> Acked-by: Stéphane Marchesin <marcheu@chromium.org> Link: https://patchwork.freedesktop.org/patch/msgid/20190205095032.22673-4-tvrtko.ursulin@linux.intel.com
2019-02-05 09:50:31 +00:00
* Context SSEU programming
*
* It may be necessary for either functional or performance reason to configure
* a context to run with a reduced number of SSEU (where SSEU stands for Slice/
* Sub-slice/EU).
*
* This is done by configuring SSEU configuration using the below
* @struct drm_i915_gem_context_param_sseu for every supported engine which
* userspace intends to use.
*
* Not all GPUs or engines support this functionality in which case an error
* code -ENODEV will be returned.
*
* Also, flexibility of possible SSEU configuration permutations varies between
* GPU generations and software imposed limitations. Requesting such a
* combination will return an error code of -EINVAL.
*
* NOTE: When perf/OA is active the context's SSEU configuration is ignored in
* favour of a single global setting.
*/
struct drm_i915_gem_context_param_sseu {
/*
* Engine class & instance to be configured or queried.
*/
struct i915_engine_class_instance engine;
drm/i915: Expose RPCS (SSEU) configuration to userspace (Gen11 only) We want to allow userspace to reconfigure the subslice configuration on a per context basis. This is required for the functional requirement of shutting down non-VME enabled sub-slices on Gen11 parts. To do so, we expose a context parameter to allow adjustment of the RPCS register stored within the context image (and currently not accessible via LRI). If the context is adjusted before first use or whilst idle, the adjustment is for "free"; otherwise if the context is active we queue a request to do so (using the kernel context), following all other activity by that context, which is also marked as barrier for all following submission against the same context. Since the overhead of device re-configuration during context switching can be significant, especially in multi-context workloads, we limit this new uAPI to only support the Gen11 VME use case. In this use case either the device is fully enabled, and exactly one slice and half of the subslices are enabled. Example usage: struct drm_i915_gem_context_param_sseu sseu = { }; struct drm_i915_gem_context_param arg = { .param = I915_CONTEXT_PARAM_SSEU, .ctx_id = gem_context_create(fd), .size = sizeof(sseu), .value = to_user_pointer(&sseu) }; /* Query device defaults. */ gem_context_get_param(fd, &arg); /* Set VME configuration on a 1x6x8 part. */ sseu.slice_mask = 0x1; sseu.subslice_mask = 0xe0; gem_context_set_param(fd, &arg); v2: Fix offset of CTX_R_PWR_CLK_STATE in intel_lr_context_set_sseu() (Lionel) v3: Add ability to program this per engine (Chris) v4: Move most get_sseu() into i915_gem_context.c (Lionel) v5: Validate sseu configuration against the device's capabilities (Lionel) v6: Change context powergating settings through MI_SDM on kernel context (Chris) v7: Synchronize the requests following a powergating setting change using a global dependency (Chris) Iterate timelines through dev_priv.gt.active_rings (Tvrtko) Disable RPCS configuration setting for non capable users (Lionel/Tvrtko) v8: s/union intel_sseu/struct intel_sseu/ (Lionel) s/dev_priv/i915/ (Tvrtko) Change uapi class/instance fields to u16 (Tvrtko) Bump mask fields to 64bits (Lionel) Don't return EPERM when dynamic sseu is disabled (Tvrtko) v9: Import context image into kernel context's ppgtt only when reconfiguring powergated slice/subslices (Chris) Use aliasing ppgtt when needed (Michel) Tvrtko Ursulin: v10: * Update for upstream changes. * Request submit needs a RPM reference. * Reject on !FULL_PPGTT for simplicity. * Pull out get/set param to helpers for readability and less indent. * Use i915_request_await_dma_fence in add_global_barrier to skip waits on the same timeline and avoid GEM_BUG_ON. * No need to explicitly assign a NULL pointer to engine in legacy mode. * No need to move gen8_make_rpcs up. * Factored out global barrier as prep patch. * Allow to only CAP_SYS_ADMIN if !Gen11. v11: * Remove engine vfunc in favour of local helper. (Chris Wilson) * Stop retiring requests before updates since it is not needed (Chris Wilson) * Implement direct CPU update path for idle contexts. (Chris Wilson) * Left side dependency needs only be on the same context timeline. (Chris Wilson) * It is sufficient to order the timeline. (Chris Wilson) * Reject !RCS configuration attempts with -ENODEV for now. v12: * Rebase for make_rpcs. v13: * Centralize SSEU normalization to make_rpcs. * Type width checking (uAPI <-> implementation). * Gen11 restrictions uAPI checks. * Gen11 subslice count differences handling. Chris Wilson: * args->size handling fixes. * Update context image from GGTT. * Postpone context image update to pinning. * Use i915_gem_active_raw instead of last_request_on_engine. v14: * Add activity tracker on intel_context to fix the lifetime issues and simplify the code. (Chris Wilson) v15: * Fix context pin leak if no space in ring by simplifying the context pinning sequence. v16: * Rebase for context get/set param locking changes. * Just -ENODEV on !Gen11. (Joonas) v17: * Fix one Gen11 subslice enablement rule. * Handle error from i915_sw_fence_await_sw_fence_gfp. (Chris Wilson) v18: * Update commit message. (Joonas) * Restrict uAPI to VME use case. (Joonas) v19: * Rebase. v20: * Rebase for ce->active_tracker. v21: * Rebase for IS_GEN changes. v22: * Reserve uAPI for flags straight away. (Chris Wilson) v23: * Rebase for RUNTIME_INFO. v24: * Added some headline docs for the uapi usage. (Joonas/Chris) v25: * Renamed class/instance to engine_class/engine_instance to avoid clash with C++ keyword. (Tony Ye) v26: * Rebased for runtime pm api changes. v27: * Rebased for intel_context_init. * Wrap commit msg to 75. v28: (Chris Wilson) * Use i915_gem_ggtt. * Use i915_request_await_dma_fence to show a better example. v29: * i915_timeline_set_barrier can now fail. (Chris Wilson) v30: * Capture some acks. v31: * Drop the WARN_ON from use controllable paths. (Chris Wilson) * Use overflows_type for all checks. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100899 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107634 Issue: https://github.com/intel/media-driver/issues/267 Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Cc: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Cc: Zhipeng Gong <zhipeng.gong@intel.com> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Tony Ye <tony.ye@intel.com> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Acked-by: Timo Aaltonen <timo.aaltonen@canonical.com> Acked-by: Takashi Iwai <tiwai@suse.de> Acked-by: Stéphane Marchesin <marcheu@chromium.org> Link: https://patchwork.freedesktop.org/patch/msgid/20190205095032.22673-4-tvrtko.ursulin@linux.intel.com
2019-02-05 09:50:31 +00:00
/*
* Unknown flags must be cleared to zero.
drm/i915: Expose RPCS (SSEU) configuration to userspace (Gen11 only) We want to allow userspace to reconfigure the subslice configuration on a per context basis. This is required for the functional requirement of shutting down non-VME enabled sub-slices on Gen11 parts. To do so, we expose a context parameter to allow adjustment of the RPCS register stored within the context image (and currently not accessible via LRI). If the context is adjusted before first use or whilst idle, the adjustment is for "free"; otherwise if the context is active we queue a request to do so (using the kernel context), following all other activity by that context, which is also marked as barrier for all following submission against the same context. Since the overhead of device re-configuration during context switching can be significant, especially in multi-context workloads, we limit this new uAPI to only support the Gen11 VME use case. In this use case either the device is fully enabled, and exactly one slice and half of the subslices are enabled. Example usage: struct drm_i915_gem_context_param_sseu sseu = { }; struct drm_i915_gem_context_param arg = { .param = I915_CONTEXT_PARAM_SSEU, .ctx_id = gem_context_create(fd), .size = sizeof(sseu), .value = to_user_pointer(&sseu) }; /* Query device defaults. */ gem_context_get_param(fd, &arg); /* Set VME configuration on a 1x6x8 part. */ sseu.slice_mask = 0x1; sseu.subslice_mask = 0xe0; gem_context_set_param(fd, &arg); v2: Fix offset of CTX_R_PWR_CLK_STATE in intel_lr_context_set_sseu() (Lionel) v3: Add ability to program this per engine (Chris) v4: Move most get_sseu() into i915_gem_context.c (Lionel) v5: Validate sseu configuration against the device's capabilities (Lionel) v6: Change context powergating settings through MI_SDM on kernel context (Chris) v7: Synchronize the requests following a powergating setting change using a global dependency (Chris) Iterate timelines through dev_priv.gt.active_rings (Tvrtko) Disable RPCS configuration setting for non capable users (Lionel/Tvrtko) v8: s/union intel_sseu/struct intel_sseu/ (Lionel) s/dev_priv/i915/ (Tvrtko) Change uapi class/instance fields to u16 (Tvrtko) Bump mask fields to 64bits (Lionel) Don't return EPERM when dynamic sseu is disabled (Tvrtko) v9: Import context image into kernel context's ppgtt only when reconfiguring powergated slice/subslices (Chris) Use aliasing ppgtt when needed (Michel) Tvrtko Ursulin: v10: * Update for upstream changes. * Request submit needs a RPM reference. * Reject on !FULL_PPGTT for simplicity. * Pull out get/set param to helpers for readability and less indent. * Use i915_request_await_dma_fence in add_global_barrier to skip waits on the same timeline and avoid GEM_BUG_ON. * No need to explicitly assign a NULL pointer to engine in legacy mode. * No need to move gen8_make_rpcs up. * Factored out global barrier as prep patch. * Allow to only CAP_SYS_ADMIN if !Gen11. v11: * Remove engine vfunc in favour of local helper. (Chris Wilson) * Stop retiring requests before updates since it is not needed (Chris Wilson) * Implement direct CPU update path for idle contexts. (Chris Wilson) * Left side dependency needs only be on the same context timeline. (Chris Wilson) * It is sufficient to order the timeline. (Chris Wilson) * Reject !RCS configuration attempts with -ENODEV for now. v12: * Rebase for make_rpcs. v13: * Centralize SSEU normalization to make_rpcs. * Type width checking (uAPI <-> implementation). * Gen11 restrictions uAPI checks. * Gen11 subslice count differences handling. Chris Wilson: * args->size handling fixes. * Update context image from GGTT. * Postpone context image update to pinning. * Use i915_gem_active_raw instead of last_request_on_engine. v14: * Add activity tracker on intel_context to fix the lifetime issues and simplify the code. (Chris Wilson) v15: * Fix context pin leak if no space in ring by simplifying the context pinning sequence. v16: * Rebase for context get/set param locking changes. * Just -ENODEV on !Gen11. (Joonas) v17: * Fix one Gen11 subslice enablement rule. * Handle error from i915_sw_fence_await_sw_fence_gfp. (Chris Wilson) v18: * Update commit message. (Joonas) * Restrict uAPI to VME use case. (Joonas) v19: * Rebase. v20: * Rebase for ce->active_tracker. v21: * Rebase for IS_GEN changes. v22: * Reserve uAPI for flags straight away. (Chris Wilson) v23: * Rebase for RUNTIME_INFO. v24: * Added some headline docs for the uapi usage. (Joonas/Chris) v25: * Renamed class/instance to engine_class/engine_instance to avoid clash with C++ keyword. (Tony Ye) v26: * Rebased for runtime pm api changes. v27: * Rebased for intel_context_init. * Wrap commit msg to 75. v28: (Chris Wilson) * Use i915_gem_ggtt. * Use i915_request_await_dma_fence to show a better example. v29: * i915_timeline_set_barrier can now fail. (Chris Wilson) v30: * Capture some acks. v31: * Drop the WARN_ON from use controllable paths. (Chris Wilson) * Use overflows_type for all checks. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100899 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107634 Issue: https://github.com/intel/media-driver/issues/267 Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Cc: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Cc: Zhipeng Gong <zhipeng.gong@intel.com> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Tony Ye <tony.ye@intel.com> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Acked-by: Timo Aaltonen <timo.aaltonen@canonical.com> Acked-by: Takashi Iwai <tiwai@suse.de> Acked-by: Stéphane Marchesin <marcheu@chromium.org> Link: https://patchwork.freedesktop.org/patch/msgid/20190205095032.22673-4-tvrtko.ursulin@linux.intel.com
2019-02-05 09:50:31 +00:00
*/
__u32 flags;
#define I915_CONTEXT_SSEU_FLAG_ENGINE_INDEX (1u << 0)
drm/i915: Expose RPCS (SSEU) configuration to userspace (Gen11 only) We want to allow userspace to reconfigure the subslice configuration on a per context basis. This is required for the functional requirement of shutting down non-VME enabled sub-slices on Gen11 parts. To do so, we expose a context parameter to allow adjustment of the RPCS register stored within the context image (and currently not accessible via LRI). If the context is adjusted before first use or whilst idle, the adjustment is for "free"; otherwise if the context is active we queue a request to do so (using the kernel context), following all other activity by that context, which is also marked as barrier for all following submission against the same context. Since the overhead of device re-configuration during context switching can be significant, especially in multi-context workloads, we limit this new uAPI to only support the Gen11 VME use case. In this use case either the device is fully enabled, and exactly one slice and half of the subslices are enabled. Example usage: struct drm_i915_gem_context_param_sseu sseu = { }; struct drm_i915_gem_context_param arg = { .param = I915_CONTEXT_PARAM_SSEU, .ctx_id = gem_context_create(fd), .size = sizeof(sseu), .value = to_user_pointer(&sseu) }; /* Query device defaults. */ gem_context_get_param(fd, &arg); /* Set VME configuration on a 1x6x8 part. */ sseu.slice_mask = 0x1; sseu.subslice_mask = 0xe0; gem_context_set_param(fd, &arg); v2: Fix offset of CTX_R_PWR_CLK_STATE in intel_lr_context_set_sseu() (Lionel) v3: Add ability to program this per engine (Chris) v4: Move most get_sseu() into i915_gem_context.c (Lionel) v5: Validate sseu configuration against the device's capabilities (Lionel) v6: Change context powergating settings through MI_SDM on kernel context (Chris) v7: Synchronize the requests following a powergating setting change using a global dependency (Chris) Iterate timelines through dev_priv.gt.active_rings (Tvrtko) Disable RPCS configuration setting for non capable users (Lionel/Tvrtko) v8: s/union intel_sseu/struct intel_sseu/ (Lionel) s/dev_priv/i915/ (Tvrtko) Change uapi class/instance fields to u16 (Tvrtko) Bump mask fields to 64bits (Lionel) Don't return EPERM when dynamic sseu is disabled (Tvrtko) v9: Import context image into kernel context's ppgtt only when reconfiguring powergated slice/subslices (Chris) Use aliasing ppgtt when needed (Michel) Tvrtko Ursulin: v10: * Update for upstream changes. * Request submit needs a RPM reference. * Reject on !FULL_PPGTT for simplicity. * Pull out get/set param to helpers for readability and less indent. * Use i915_request_await_dma_fence in add_global_barrier to skip waits on the same timeline and avoid GEM_BUG_ON. * No need to explicitly assign a NULL pointer to engine in legacy mode. * No need to move gen8_make_rpcs up. * Factored out global barrier as prep patch. * Allow to only CAP_SYS_ADMIN if !Gen11. v11: * Remove engine vfunc in favour of local helper. (Chris Wilson) * Stop retiring requests before updates since it is not needed (Chris Wilson) * Implement direct CPU update path for idle contexts. (Chris Wilson) * Left side dependency needs only be on the same context timeline. (Chris Wilson) * It is sufficient to order the timeline. (Chris Wilson) * Reject !RCS configuration attempts with -ENODEV for now. v12: * Rebase for make_rpcs. v13: * Centralize SSEU normalization to make_rpcs. * Type width checking (uAPI <-> implementation). * Gen11 restrictions uAPI checks. * Gen11 subslice count differences handling. Chris Wilson: * args->size handling fixes. * Update context image from GGTT. * Postpone context image update to pinning. * Use i915_gem_active_raw instead of last_request_on_engine. v14: * Add activity tracker on intel_context to fix the lifetime issues and simplify the code. (Chris Wilson) v15: * Fix context pin leak if no space in ring by simplifying the context pinning sequence. v16: * Rebase for context get/set param locking changes. * Just -ENODEV on !Gen11. (Joonas) v17: * Fix one Gen11 subslice enablement rule. * Handle error from i915_sw_fence_await_sw_fence_gfp. (Chris Wilson) v18: * Update commit message. (Joonas) * Restrict uAPI to VME use case. (Joonas) v19: * Rebase. v20: * Rebase for ce->active_tracker. v21: * Rebase for IS_GEN changes. v22: * Reserve uAPI for flags straight away. (Chris Wilson) v23: * Rebase for RUNTIME_INFO. v24: * Added some headline docs for the uapi usage. (Joonas/Chris) v25: * Renamed class/instance to engine_class/engine_instance to avoid clash with C++ keyword. (Tony Ye) v26: * Rebased for runtime pm api changes. v27: * Rebased for intel_context_init. * Wrap commit msg to 75. v28: (Chris Wilson) * Use i915_gem_ggtt. * Use i915_request_await_dma_fence to show a better example. v29: * i915_timeline_set_barrier can now fail. (Chris Wilson) v30: * Capture some acks. v31: * Drop the WARN_ON from use controllable paths. (Chris Wilson) * Use overflows_type for all checks. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100899 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107634 Issue: https://github.com/intel/media-driver/issues/267 Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Cc: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Cc: Zhipeng Gong <zhipeng.gong@intel.com> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Tony Ye <tony.ye@intel.com> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Acked-by: Timo Aaltonen <timo.aaltonen@canonical.com> Acked-by: Takashi Iwai <tiwai@suse.de> Acked-by: Stéphane Marchesin <marcheu@chromium.org> Link: https://patchwork.freedesktop.org/patch/msgid/20190205095032.22673-4-tvrtko.ursulin@linux.intel.com
2019-02-05 09:50:31 +00:00
/*
* Mask of slices to enable for the context. Valid values are a subset
* of the bitmask value returned for I915_PARAM_SLICE_MASK.
*/
__u64 slice_mask;
/*
* Mask of subslices to enable for the context. Valid values are a
* subset of the bitmask value return by I915_PARAM_SUBSLICE_MASK.
*/
__u64 subslice_mask;
/*
* Minimum/Maximum number of EUs to enable per subslice for the
* context. min_eus_per_subslice must be inferior or equal to
* max_eus_per_subslice.
*/
__u16 min_eus_per_subslice;
__u16 max_eus_per_subslice;
/*
* Unused for now. Must be cleared to zero.
*/
__u32 rsvd;
};
/**
* DOC: Virtual Engine uAPI
*
* Virtual engine is a concept where userspace is able to configure a set of
* physical engines, submit a batch buffer, and let the driver execute it on any
* engine from the set as it sees fit.
*
* This is primarily useful on parts which have multiple instances of a same
* class engine, like for example GT3+ Skylake parts with their two VCS engines.
*
* For instance userspace can enumerate all engines of a certain class using the
* previously described `Engine Discovery uAPI`_. After that userspace can
* create a GEM context with a placeholder slot for the virtual engine (using
* `I915_ENGINE_CLASS_INVALID` and `I915_ENGINE_CLASS_INVALID_NONE` for class
* and instance respectively) and finally using the
* `I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE` extension place a virtual engine in
* the same reserved slot.
*
* Example of creating a virtual engine and submitting a batch buffer to it:
*
* .. code-block:: C
*
* I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(virtual, 2) = {
* .base.name = I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE,
* .engine_index = 0, // Place this virtual engine into engine map slot 0
* .num_siblings = 2,
* .engines = { { I915_ENGINE_CLASS_VIDEO, 0 },
* { I915_ENGINE_CLASS_VIDEO, 1 }, },
* };
* I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 1) = {
* .engines = { { I915_ENGINE_CLASS_INVALID,
* I915_ENGINE_CLASS_INVALID_NONE } },
* .extensions = to_user_pointer(&virtual), // Chains after load_balance extension
* };
* struct drm_i915_gem_context_create_ext_setparam p_engines = {
* .base = {
* .name = I915_CONTEXT_CREATE_EXT_SETPARAM,
* },
* .param = {
* .param = I915_CONTEXT_PARAM_ENGINES,
* .value = to_user_pointer(&engines),
* .size = sizeof(engines),
* },
* };
* struct drm_i915_gem_context_create_ext create = {
* .flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS,
* .extensions = to_user_pointer(&p_engines);
* };
*
* ctx_id = gem_context_create_ext(drm_fd, &create);
*
* // Now we have created a GEM context with its engine map containing a
* // single virtual engine. Submissions to this slot can go either to
* // vcs0 or vcs1, depending on the load balancing algorithm used inside
* // the driver. The load balancing is dynamic from one batch buffer to
* // another and transparent to userspace.
*
* ...
* execbuf.rsvd1 = ctx_id;
* execbuf.flags = 0; // Submits to index 0 which is the virtual engine
* gem_execbuf(drm_fd, &execbuf);
*/
drm/i915: Load balancing across a virtual engine Having allowed the user to define a set of engines that they will want to only use, we go one step further and allow them to bind those engines into a single virtual instance. Submitting a batch to the virtual engine will then forward it to any one of the set in a manner as best to distribute load. The virtual engine has a single timeline across all engines (it operates as a single queue), so it is not able to concurrently run batches across multiple engines by itself; that is left up to the user to submit multiple concurrent batches to multiple queues. Multiple users will be load balanced across the system. The mechanism used for load balancing in this patch is a late greedy balancer. When a request is ready for execution, it is added to each engine's queue, and when an engine is ready for its next request it claims it from the virtual engine. The first engine to do so, wins, i.e. the request is executed at the earliest opportunity (idle moment) in the system. As not all HW is created equal, the user is still able to skip the virtual engine and execute the batch on a specific engine, all within the same queue. It will then be executed in order on the correct engine, with execution on other virtual engines being moved away due to the load detection. A couple of areas for potential improvement left! - The virtual engine always take priority over equal-priority tasks. Mostly broken up by applying FQ_CODEL rules for prioritising new clients, and hopefully the virtual and real engines are not then congested (i.e. all work is via virtual engines, or all work is to the real engine). - We require the breadcrumb irq around every virtual engine request. For normal engines, we eliminate the need for the slow round trip via interrupt by using the submit fence and queueing in order. For virtual engines, we have to allow any job to transfer to a new ring, and cannot coalesce the submissions, so require the completion fence instead, forcing the persistent use of interrupts. - We only drip feed single requests through each virtual engine and onto the physical engines, even if there was enough work to fill all ELSP, leaving small stalls with an idle CS event at the end of every request. Could we be greedy and fill both slots? Being lazy is virtuous for load distribution on less-than-full workloads though. Other areas of improvement are more general, such as reducing lock contention, reducing dispatch overhead, looking at direct submission rather than bouncing around tasklets etc. sseu: Lift the restriction to allow sseu to be reconfigured on virtual engines composed of RENDER_CLASS (rcs). v2: macroize check_user_mbz() v3: Cancel virtual engines on wedging v4: Commence commenting v5: Replace 64b sibling_mask with a list of class:instance v6: Drop the one-element array in the uabi v7: Assert it is an virtual engine in to_virtual_engine() v8: Skip over holes in [class][inst] so we can selftest with (vcs0, vcs2) Link: https://github.com/intel/media-driver/pull/283 Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20190521211134.16117-6-chris@chris-wilson.co.uk
2019-05-21 21:11:30 +00:00
/*
* i915_context_engines_load_balance:
*
* Enable load balancing across this set of engines.
*
* Into the I915_EXEC_DEFAULT slot [0], a virtual engine is created that when
* used will proxy the execbuffer request onto one of the set of engines
* in such a way as to distribute the load evenly across the set.
*
* The set of engines must be compatible (e.g. the same HW class) as they
* will share the same logical GPU context and ring.
*
* To intermix rendering with the virtual engine and direct rendering onto
* the backing engines (bypassing the load balancing proxy), the context must
* be defined to use a single timeline for all engines.
*/
struct i915_context_engines_load_balance {
struct i915_user_extension base;
__u16 engine_index;
__u16 num_siblings;
__u32 flags; /* all undefined flags must be zero */
__u64 mbz64; /* reserved for future use; must be zero */
treewide: uapi: Replace zero-length arrays with flexible-array members There is a regular need in the kernel to provide a way to declare having a dynamically sized set of trailing elements in a structure. Kernel code should always use “flexible array members”[1] for these cases. The older style of one-element or zero-length arrays should no longer be used[2]. This code was transformed with the help of Coccinelle: (linux-5.19-rc2$ spatch --jobs $(getconf _NPROCESSORS_ONLN) --sp-file script.cocci --include-headers --dir . > output.patch) @@ identifier S, member, array; type T1, T2; @@ struct S { ... T1 member; T2 array[ - 0 ]; }; -fstrict-flex-arrays=3 is coming and we need to land these changes to prevent issues like these in the short future: ../fs/minix/dir.c:337:3: warning: 'strcpy' will always overflow; destination buffer has size 0, but the source string has length 2 (including NUL byte) [-Wfortify-source] strcpy(de3->name, "."); ^ Since these are all [0] to [] changes, the risk to UAPI is nearly zero. If this breaks anything, we can use a union with a new member name. [1] https://en.wikipedia.org/wiki/Flexible_array_member [2] https://www.kernel.org/doc/html/v5.16/process/deprecated.html#zero-length-and-one-element-arrays Link: https://github.com/KSPP/linux/issues/78 Build-tested-by: kernel test robot <lkp@intel.com> Link: https://lore.kernel.org/lkml/62b675ec.wKX6AOZ6cbE71vtF%25lkp@intel.com/ Acked-by: Dan Williams <dan.j.williams@intel.com> # For ndctl.h Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
2022-04-07 00:36:51 +00:00
struct i915_engine_class_instance engines[];
drm/i915: Load balancing across a virtual engine Having allowed the user to define a set of engines that they will want to only use, we go one step further and allow them to bind those engines into a single virtual instance. Submitting a batch to the virtual engine will then forward it to any one of the set in a manner as best to distribute load. The virtual engine has a single timeline across all engines (it operates as a single queue), so it is not able to concurrently run batches across multiple engines by itself; that is left up to the user to submit multiple concurrent batches to multiple queues. Multiple users will be load balanced across the system. The mechanism used for load balancing in this patch is a late greedy balancer. When a request is ready for execution, it is added to each engine's queue, and when an engine is ready for its next request it claims it from the virtual engine. The first engine to do so, wins, i.e. the request is executed at the earliest opportunity (idle moment) in the system. As not all HW is created equal, the user is still able to skip the virtual engine and execute the batch on a specific engine, all within the same queue. It will then be executed in order on the correct engine, with execution on other virtual engines being moved away due to the load detection. A couple of areas for potential improvement left! - The virtual engine always take priority over equal-priority tasks. Mostly broken up by applying FQ_CODEL rules for prioritising new clients, and hopefully the virtual and real engines are not then congested (i.e. all work is via virtual engines, or all work is to the real engine). - We require the breadcrumb irq around every virtual engine request. For normal engines, we eliminate the need for the slow round trip via interrupt by using the submit fence and queueing in order. For virtual engines, we have to allow any job to transfer to a new ring, and cannot coalesce the submissions, so require the completion fence instead, forcing the persistent use of interrupts. - We only drip feed single requests through each virtual engine and onto the physical engines, even if there was enough work to fill all ELSP, leaving small stalls with an idle CS event at the end of every request. Could we be greedy and fill both slots? Being lazy is virtuous for load distribution on less-than-full workloads though. Other areas of improvement are more general, such as reducing lock contention, reducing dispatch overhead, looking at direct submission rather than bouncing around tasklets etc. sseu: Lift the restriction to allow sseu to be reconfigured on virtual engines composed of RENDER_CLASS (rcs). v2: macroize check_user_mbz() v3: Cancel virtual engines on wedging v4: Commence commenting v5: Replace 64b sibling_mask with a list of class:instance v6: Drop the one-element array in the uabi v7: Assert it is an virtual engine in to_virtual_engine() v8: Skip over holes in [class][inst] so we can selftest with (vcs0, vcs2) Link: https://github.com/intel/media-driver/pull/283 Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20190521211134.16117-6-chris@chris-wilson.co.uk
2019-05-21 21:11:30 +00:00
} __attribute__((packed));
#define I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(name__, N__) struct { \
struct i915_user_extension base; \
__u16 engine_index; \
__u16 num_siblings; \
__u32 flags; \
__u64 mbz64; \
struct i915_engine_class_instance engines[N__]; \
} __attribute__((packed)) name__
/*
* i915_context_engines_bond:
*
* Constructed bonded pairs for execution within a virtual engine.
*
* All engines are equal, but some are more equal than others. Given
* the distribution of resources in the HW, it may be preferable to run
* a request on a given subset of engines in parallel to a request on a
* specific engine. We enable this selection of engines within a virtual
* engine by specifying bonding pairs, for any given master engine we will
* only execute on one of the corresponding siblings within the virtual engine.
*
* To execute a request in parallel on the master engine and a sibling requires
* coordination with a I915_EXEC_FENCE_SUBMIT.
*/
struct i915_context_engines_bond {
struct i915_user_extension base;
struct i915_engine_class_instance master;
__u16 virtual_index; /* index of virtual engine in ctx->engines[] */
__u16 num_bonds;
__u64 flags; /* all undefined flags must be zero */
__u64 mbz64[4]; /* reserved for future use; must be zero */
treewide: uapi: Replace zero-length arrays with flexible-array members There is a regular need in the kernel to provide a way to declare having a dynamically sized set of trailing elements in a structure. Kernel code should always use “flexible array members”[1] for these cases. The older style of one-element or zero-length arrays should no longer be used[2]. This code was transformed with the help of Coccinelle: (linux-5.19-rc2$ spatch --jobs $(getconf _NPROCESSORS_ONLN) --sp-file script.cocci --include-headers --dir . > output.patch) @@ identifier S, member, array; type T1, T2; @@ struct S { ... T1 member; T2 array[ - 0 ]; }; -fstrict-flex-arrays=3 is coming and we need to land these changes to prevent issues like these in the short future: ../fs/minix/dir.c:337:3: warning: 'strcpy' will always overflow; destination buffer has size 0, but the source string has length 2 (including NUL byte) [-Wfortify-source] strcpy(de3->name, "."); ^ Since these are all [0] to [] changes, the risk to UAPI is nearly zero. If this breaks anything, we can use a union with a new member name. [1] https://en.wikipedia.org/wiki/Flexible_array_member [2] https://www.kernel.org/doc/html/v5.16/process/deprecated.html#zero-length-and-one-element-arrays Link: https://github.com/KSPP/linux/issues/78 Build-tested-by: kernel test robot <lkp@intel.com> Link: https://lore.kernel.org/lkml/62b675ec.wKX6AOZ6cbE71vtF%25lkp@intel.com/ Acked-by: Dan Williams <dan.j.williams@intel.com> # For ndctl.h Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
2022-04-07 00:36:51 +00:00
struct i915_engine_class_instance engines[];
} __attribute__((packed));
#define I915_DEFINE_CONTEXT_ENGINES_BOND(name__, N__) struct { \
struct i915_user_extension base; \
struct i915_engine_class_instance master; \
__u16 virtual_index; \
__u16 num_bonds; \
__u64 flags; \
__u64 mbz64[4]; \
struct i915_engine_class_instance engines[N__]; \
} __attribute__((packed)) name__
/**
* struct i915_context_engines_parallel_submit - Configure engine for
* parallel submission.
*
* Setup a slot in the context engine map to allow multiple BBs to be submitted
* in a single execbuf IOCTL. Those BBs will then be scheduled to run on the GPU
* in parallel. Multiple hardware contexts are created internally in the i915 to
* run these BBs. Once a slot is configured for N BBs only N BBs can be
* submitted in each execbuf IOCTL and this is implicit behavior e.g. The user
* doesn't tell the execbuf IOCTL there are N BBs, the execbuf IOCTL knows how
* many BBs there are based on the slot's configuration. The N BBs are the last
* N buffer objects or first N if I915_EXEC_BATCH_FIRST is set.
*
* The default placement behavior is to create implicit bonds between each
* context if each context maps to more than 1 physical engine (e.g. context is
* a virtual engine). Also we only allow contexts of same engine class and these
* contexts must be in logically contiguous order. Examples of the placement
* behavior are described below. Lastly, the default is to not allow BBs to be
* preempted mid-batch. Rather insert coordinated preemption points on all
* hardware contexts between each set of BBs. Flags could be added in the future
* to change both of these default behaviors.
*
* Returns -EINVAL if hardware context placement configuration is invalid or if
* the placement configuration isn't supported on the platform / submission
* interface.
* Returns -ENODEV if extension isn't supported on the platform / submission
* interface.
*
* .. code-block:: none
*
* Examples syntax:
* CS[X] = generic engine of same class, logical instance X
* INVALID = I915_ENGINE_CLASS_INVALID, I915_ENGINE_CLASS_INVALID_NONE
*
* Example 1 pseudo code:
* set_engines(INVALID)
* set_parallel(engine_index=0, width=2, num_siblings=1,
* engines=CS[0],CS[1])
*
* Results in the following valid placement:
* CS[0], CS[1]
*
* Example 2 pseudo code:
* set_engines(INVALID)
* set_parallel(engine_index=0, width=2, num_siblings=2,
* engines=CS[0],CS[2],CS[1],CS[3])
*
* Results in the following valid placements:
* CS[0], CS[1]
* CS[2], CS[3]
*
* This can be thought of as two virtual engines, each containing two
* engines thereby making a 2D array. However, there are bonds tying the
* entries together and placing restrictions on how they can be scheduled.
* Specifically, the scheduler can choose only vertical columns from the 2D
* array. That is, CS[0] is bonded to CS[1] and CS[2] to CS[3]. So if the
* scheduler wants to submit to CS[0], it must also choose CS[1] and vice
* versa. Same for CS[2] requires also using CS[3].
* VE[0] = CS[0], CS[2]
* VE[1] = CS[1], CS[3]
*
* Example 3 pseudo code:
* set_engines(INVALID)
* set_parallel(engine_index=0, width=2, num_siblings=2,
* engines=CS[0],CS[1],CS[1],CS[3])
*
* Results in the following valid and invalid placements:
* CS[0], CS[1]
* CS[1], CS[3] - Not logically contiguous, return -EINVAL
*/
struct i915_context_engines_parallel_submit {
/**
* @base: base user extension.
*/
struct i915_user_extension base;
/**
* @engine_index: slot for parallel engine
*/
__u16 engine_index;
/**
* @width: number of contexts per parallel engine or in other words the
* number of batches in each submission
*/
__u16 width;
/**
* @num_siblings: number of siblings per context or in other words the
* number of possible placements for each submission
*/
__u16 num_siblings;
/**
* @mbz16: reserved for future use; must be zero
*/
__u16 mbz16;
/**
* @flags: all undefined flags must be zero, currently not defined flags
*/
__u64 flags;
/**
* @mbz64: reserved for future use; must be zero
*/
__u64 mbz64[3];
/**
* @engines: 2-d array of engine instances to configure parallel engine
*
* length = width (i) * num_siblings (j)
* index = j + i * num_siblings
*/
treewide: uapi: Replace zero-length arrays with flexible-array members There is a regular need in the kernel to provide a way to declare having a dynamically sized set of trailing elements in a structure. Kernel code should always use “flexible array members”[1] for these cases. The older style of one-element or zero-length arrays should no longer be used[2]. This code was transformed with the help of Coccinelle: (linux-5.19-rc2$ spatch --jobs $(getconf _NPROCESSORS_ONLN) --sp-file script.cocci --include-headers --dir . > output.patch) @@ identifier S, member, array; type T1, T2; @@ struct S { ... T1 member; T2 array[ - 0 ]; }; -fstrict-flex-arrays=3 is coming and we need to land these changes to prevent issues like these in the short future: ../fs/minix/dir.c:337:3: warning: 'strcpy' will always overflow; destination buffer has size 0, but the source string has length 2 (including NUL byte) [-Wfortify-source] strcpy(de3->name, "."); ^ Since these are all [0] to [] changes, the risk to UAPI is nearly zero. If this breaks anything, we can use a union with a new member name. [1] https://en.wikipedia.org/wiki/Flexible_array_member [2] https://www.kernel.org/doc/html/v5.16/process/deprecated.html#zero-length-and-one-element-arrays Link: https://github.com/KSPP/linux/issues/78 Build-tested-by: kernel test robot <lkp@intel.com> Link: https://lore.kernel.org/lkml/62b675ec.wKX6AOZ6cbE71vtF%25lkp@intel.com/ Acked-by: Dan Williams <dan.j.williams@intel.com> # For ndctl.h Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
2022-04-07 00:36:51 +00:00
struct i915_engine_class_instance engines[];
} __packed;
#define I915_DEFINE_CONTEXT_ENGINES_PARALLEL_SUBMIT(name__, N__) struct { \
struct i915_user_extension base; \
__u16 engine_index; \
__u16 width; \
__u16 num_siblings; \
__u16 mbz16; \
__u64 flags; \
__u64 mbz64[3]; \
struct i915_engine_class_instance engines[N__]; \
} __attribute__((packed)) name__
/**
* DOC: Context Engine Map uAPI
*
* Context engine map is a new way of addressing engines when submitting batch-
* buffers, replacing the existing way of using identifiers like `I915_EXEC_BLT`
* inside the flags field of `struct drm_i915_gem_execbuffer2`.
*
* To use it created GEM contexts need to be configured with a list of engines
* the user is intending to submit to. This is accomplished using the
* `I915_CONTEXT_PARAM_ENGINES` parameter and `struct
* i915_context_param_engines`.
*
* For such contexts the `I915_EXEC_RING_MASK` field becomes an index into the
* configured map.
*
* Example of creating such context and submitting against it:
*
* .. code-block:: C
*
* I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 2) = {
* .engines = { { I915_ENGINE_CLASS_RENDER, 0 },
* { I915_ENGINE_CLASS_COPY, 0 } }
* };
* struct drm_i915_gem_context_create_ext_setparam p_engines = {
* .base = {
* .name = I915_CONTEXT_CREATE_EXT_SETPARAM,
* },
* .param = {
* .param = I915_CONTEXT_PARAM_ENGINES,
* .value = to_user_pointer(&engines),
* .size = sizeof(engines),
* },
* };
* struct drm_i915_gem_context_create_ext create = {
* .flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS,
* .extensions = to_user_pointer(&p_engines);
* };
*
* ctx_id = gem_context_create_ext(drm_fd, &create);
*
* // We have now created a GEM context with two engines in the map:
* // Index 0 points to rcs0 while index 1 points to bcs0. Other engines
* // will not be accessible from this context.
*
* ...
* execbuf.rsvd1 = ctx_id;
* execbuf.flags = 0; // Submits to index 0, which is rcs0 for this context
* gem_execbuf(drm_fd, &execbuf);
*
* ...
* execbuf.rsvd1 = ctx_id;
* execbuf.flags = 1; // Submits to index 0, which is bcs0 for this context
* gem_execbuf(drm_fd, &execbuf);
*/
drm/i915: Allow a context to define its set of engines Over the last few years, we have debated how to extend the user API to support an increase in the number of engines, that may be sparse and even be heterogeneous within a class (not all video decoders created equal). We settled on using (class, instance) tuples to identify a specific engine, with an API for the user to construct a map of engines to capabilities. Into this picture, we then add a challenge of virtual engines; one user engine that maps behind the scenes to any number of physical engines. To keep it general, we want the user to have full control over that mapping. To that end, we allow the user to constrain a context to define the set of engines that it can access, order fully controlled by the user via (class, instance). With such precise control in context setup, we can continue to use the existing execbuf uABI of specifying a single index; only now it doesn't automagically map onto the engines, it uses the user defined engine map from the context. v2: Fixup freeing of local on success of get_engines() v3: Allow empty engines[] v4: s/nengine/num_engines/ v5: Replace 64 limit on num_engines with a note that execbuf is currently limited to only using the first 64 engines. v6: Actually use the engines_mutex to guard the ctx->engines. Testcase: igt/gem_ctx_engines Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20190521211134.16117-2-chris@chris-wilson.co.uk
2019-05-21 21:11:26 +00:00
struct i915_context_param_engines {
__u64 extensions; /* linked chain of extension blocks, 0 terminates */
drm/i915: Load balancing across a virtual engine Having allowed the user to define a set of engines that they will want to only use, we go one step further and allow them to bind those engines into a single virtual instance. Submitting a batch to the virtual engine will then forward it to any one of the set in a manner as best to distribute load. The virtual engine has a single timeline across all engines (it operates as a single queue), so it is not able to concurrently run batches across multiple engines by itself; that is left up to the user to submit multiple concurrent batches to multiple queues. Multiple users will be load balanced across the system. The mechanism used for load balancing in this patch is a late greedy balancer. When a request is ready for execution, it is added to each engine's queue, and when an engine is ready for its next request it claims it from the virtual engine. The first engine to do so, wins, i.e. the request is executed at the earliest opportunity (idle moment) in the system. As not all HW is created equal, the user is still able to skip the virtual engine and execute the batch on a specific engine, all within the same queue. It will then be executed in order on the correct engine, with execution on other virtual engines being moved away due to the load detection. A couple of areas for potential improvement left! - The virtual engine always take priority over equal-priority tasks. Mostly broken up by applying FQ_CODEL rules for prioritising new clients, and hopefully the virtual and real engines are not then congested (i.e. all work is via virtual engines, or all work is to the real engine). - We require the breadcrumb irq around every virtual engine request. For normal engines, we eliminate the need for the slow round trip via interrupt by using the submit fence and queueing in order. For virtual engines, we have to allow any job to transfer to a new ring, and cannot coalesce the submissions, so require the completion fence instead, forcing the persistent use of interrupts. - We only drip feed single requests through each virtual engine and onto the physical engines, even if there was enough work to fill all ELSP, leaving small stalls with an idle CS event at the end of every request. Could we be greedy and fill both slots? Being lazy is virtuous for load distribution on less-than-full workloads though. Other areas of improvement are more general, such as reducing lock contention, reducing dispatch overhead, looking at direct submission rather than bouncing around tasklets etc. sseu: Lift the restriction to allow sseu to be reconfigured on virtual engines composed of RENDER_CLASS (rcs). v2: macroize check_user_mbz() v3: Cancel virtual engines on wedging v4: Commence commenting v5: Replace 64b sibling_mask with a list of class:instance v6: Drop the one-element array in the uabi v7: Assert it is an virtual engine in to_virtual_engine() v8: Skip over holes in [class][inst] so we can selftest with (vcs0, vcs2) Link: https://github.com/intel/media-driver/pull/283 Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20190521211134.16117-6-chris@chris-wilson.co.uk
2019-05-21 21:11:30 +00:00
#define I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE 0 /* see i915_context_engines_load_balance */
#define I915_CONTEXT_ENGINES_EXT_BOND 1 /* see i915_context_engines_bond */
#define I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT 2 /* see i915_context_engines_parallel_submit */
struct i915_engine_class_instance engines[];
drm/i915: Allow a context to define its set of engines Over the last few years, we have debated how to extend the user API to support an increase in the number of engines, that may be sparse and even be heterogeneous within a class (not all video decoders created equal). We settled on using (class, instance) tuples to identify a specific engine, with an API for the user to construct a map of engines to capabilities. Into this picture, we then add a challenge of virtual engines; one user engine that maps behind the scenes to any number of physical engines. To keep it general, we want the user to have full control over that mapping. To that end, we allow the user to constrain a context to define the set of engines that it can access, order fully controlled by the user via (class, instance). With such precise control in context setup, we can continue to use the existing execbuf uABI of specifying a single index; only now it doesn't automagically map onto the engines, it uses the user defined engine map from the context. v2: Fixup freeing of local on success of get_engines() v3: Allow empty engines[] v4: s/nengine/num_engines/ v5: Replace 64 limit on num_engines with a note that execbuf is currently limited to only using the first 64 engines. v6: Actually use the engines_mutex to guard the ctx->engines. Testcase: igt/gem_ctx_engines Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20190521211134.16117-2-chris@chris-wilson.co.uk
2019-05-21 21:11:26 +00:00
} __attribute__((packed));
#define I915_DEFINE_CONTEXT_PARAM_ENGINES(name__, N__) struct { \
__u64 extensions; \
struct i915_engine_class_instance engines[N__]; \
} __attribute__((packed)) name__
/**
* struct drm_i915_gem_context_create_ext_setparam - Context parameter
* to set or query during context creation.
*/
struct drm_i915_gem_context_create_ext_setparam {
/** @base: Extension link. See struct i915_user_extension. */
struct i915_user_extension base;
/**
* @param: Context parameter to set or query.
* See struct drm_i915_gem_context_param.
*/
struct drm_i915_gem_context_param param;
};
struct drm_i915_gem_context_destroy {
__u32 ctx_id;
__u32 pad;
};
/**
* struct drm_i915_gem_vm_control - Structure to create or destroy VM.
*
* DRM_I915_GEM_VM_CREATE -
*
* Create a new virtual memory address space (ppGTT) for use within a context
* on the same file. Extensions can be provided to configure exactly how the
* address space is setup upon creation.
*
* The id of new VM (bound to the fd) for use with I915_CONTEXT_PARAM_VM is
* returned in the outparam @id.
*
* An extension chain maybe provided, starting with @extensions, and terminated
* by the @next_extension being 0. Currently, no extensions are defined.
*
* DRM_I915_GEM_VM_DESTROY -
*
* Destroys a previously created VM id, specified in @vm_id.
*
* No extensions or flags are allowed currently, and so must be zero.
*/
struct drm_i915_gem_vm_control {
/** @extensions: Zero-terminated chain of extensions. */
__u64 extensions;
/** @flags: reserved for future usage, currently MBZ */
__u32 flags;
/** @vm_id: Id of the VM created or to be destroyed */
__u32 vm_id;
};
struct drm_i915_reg_read {
/*
* Register offset.
* For 64bit wide registers where the upper 32bits don't immediately
* follow the lower 32bits, the offset of the lower 32bits must
* be specified
*/
__u64 offset;
#define I915_REG_READ_8B_WA (1ul << 0)
__u64 val; /* Return value */
};
/* Known registers:
*
* Render engine timestamp - 0x2358 + 64bit - gen7+
* - Note this register returns an invalid value if using the default
* single instruction 8byte read, in order to workaround that pass
* flag I915_REG_READ_8B_WA in offset field.
*
*/
/*
* struct drm_i915_reset_stats - Return global reset and other context stats
*
* Driver keeps few stats for each contexts and also global reset count.
* This struct can be used to query those stats.
*/
struct drm_i915_reset_stats {
/** @ctx_id: ID of the requested context */
__u32 ctx_id;
/** @flags: MBZ */
__u32 flags;
/** @reset_count: All resets since boot/module reload, for all contexts */
__u32 reset_count;
/** @batch_active: Number of batches lost when active in GPU, for this context */
__u32 batch_active;
/** @batch_pending: Number of batches lost pending for execution, for this context */
__u32 batch_pending;
/** @pad: MBZ */
__u32 pad;
};
/**
* struct drm_i915_gem_userptr - Create GEM object from user allocated memory.
*
* Userptr objects have several restrictions on what ioctls can be used with the
* object handle.
*/
struct drm_i915_gem_userptr {
/**
* @user_ptr: The pointer to the allocated memory.
*
* Needs to be aligned to PAGE_SIZE.
*/
__u64 user_ptr;
/**
* @user_size:
*
* The size in bytes for the allocated memory. This will also become the
* object size.
*
* Needs to be aligned to PAGE_SIZE, and should be at least PAGE_SIZE,
* or larger.
*/
__u64 user_size;
/**
* @flags:
*
* Supported flags:
*
* I915_USERPTR_READ_ONLY:
*
* Mark the object as readonly, this also means GPU access can only be
* readonly. This is only supported on HW which supports readonly access
* through the GTT. If the HW can't support readonly access, an error is
* returned.
*
drm/i915/userptr: Probe existence of backing struct pages upon creation Jason Ekstrand requested a more efficient method than userptr+set-domain to determine if the userptr object was backed by a complete set of pages upon creation. To be more efficient than simply populating the userptr using get_user_pages() (as done by the call to set-domain or execbuf), we can walk the tree of vm_area_struct and check for gaps or vma not backed by struct page (VM_PFNMAP). The question is how to handle VM_MIXEDMAP which may be either struct page or pfn backed... With discrete we are going to drop support for set_domain(), so offering a way to probe the pages, without having to resort to dummy batches has been requested. v2: - add new query param for the PROBE flag, so userspace can easily check if the kernel supports it(Jason). - use mmap_read_{lock, unlock}. - add some kernel-doc. v3: - In the docs also mention that PROBE doesn't guarantee that the pages will remain valid by the time they are actually used(Tvrtko). - Add a small comment for the hole finding logic(Jason). - Move the param next to all the other params which just return true. Testcase: igt/gem_userptr_blits/probe Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Matthew Auld <matthew.auld@intel.com> Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com> Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> Cc: Jordan Justen <jordan.l.justen@intel.com> Cc: Kenneth Graunke <kenneth@whitecape.org> Cc: Jason Ekstrand <jason@jlekstrand.net> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Cc: Ramalingam C <ramalingam.c@intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Acked-by: Kenneth Graunke <kenneth@whitecape.org> Reviewed-by: Jason Ekstrand <jason@jlekstrand.net> Acked-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20210723113405.427004-1-matthew.auld@intel.com
2021-07-23 11:34:05 +00:00
* I915_USERPTR_PROBE:
*
* Probe the provided @user_ptr range and validate that the @user_ptr is
* indeed pointing to normal memory and that the range is also valid.
* For example if some garbage address is given to the kernel, then this
* should complain.
*
* Returns -EFAULT if the probe failed.
*
* Note that this doesn't populate the backing pages, and also doesn't
* guarantee that the object will remain valid when the object is
* eventually used.
*
* The kernel supports this feature if I915_PARAM_HAS_USERPTR_PROBE
* returns a non-zero value.
*
* I915_USERPTR_UNSYNCHRONIZED:
*
* NOT USED. Setting this flag will result in an error.
*/
__u32 flags;
#define I915_USERPTR_READ_ONLY 0x1
drm/i915/userptr: Probe existence of backing struct pages upon creation Jason Ekstrand requested a more efficient method than userptr+set-domain to determine if the userptr object was backed by a complete set of pages upon creation. To be more efficient than simply populating the userptr using get_user_pages() (as done by the call to set-domain or execbuf), we can walk the tree of vm_area_struct and check for gaps or vma not backed by struct page (VM_PFNMAP). The question is how to handle VM_MIXEDMAP which may be either struct page or pfn backed... With discrete we are going to drop support for set_domain(), so offering a way to probe the pages, without having to resort to dummy batches has been requested. v2: - add new query param for the PROBE flag, so userspace can easily check if the kernel supports it(Jason). - use mmap_read_{lock, unlock}. - add some kernel-doc. v3: - In the docs also mention that PROBE doesn't guarantee that the pages will remain valid by the time they are actually used(Tvrtko). - Add a small comment for the hole finding logic(Jason). - Move the param next to all the other params which just return true. Testcase: igt/gem_userptr_blits/probe Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Matthew Auld <matthew.auld@intel.com> Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com> Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> Cc: Jordan Justen <jordan.l.justen@intel.com> Cc: Kenneth Graunke <kenneth@whitecape.org> Cc: Jason Ekstrand <jason@jlekstrand.net> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Cc: Ramalingam C <ramalingam.c@intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Acked-by: Kenneth Graunke <kenneth@whitecape.org> Reviewed-by: Jason Ekstrand <jason@jlekstrand.net> Acked-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20210723113405.427004-1-matthew.auld@intel.com
2021-07-23 11:34:05 +00:00
#define I915_USERPTR_PROBE 0x2
#define I915_USERPTR_UNSYNCHRONIZED 0x80000000
/**
* @handle: Returned handle for the object.
*
* Object handles are nonzero.
*/
__u32 handle;
};
enum drm_i915_oa_format {
drm/i915/perf: Add OA unit support for Gen 8+ Enables access to OA unit metrics for BDW, CHV, SKL and BXT which all share (more-or-less) the same OA unit design. Of particular note in comparison to Haswell: some OA unit HW config state has become per-context state and as a consequence it is somewhat more complicated to manage synchronous state changes from the cpu while there's no guarantee of what context (if any) is currently actively running on the gpu. The periodic sampling frequency which can be particularly useful for system-wide analysis (as opposed to command stream synchronised MI_REPORT_PERF_COUNT commands) is perhaps the most surprising state to have become per-context save and restored (while the OABUFFER destination is still a shared, system-wide resource). This support for gen8+ takes care to consider a number of timing challenges involved in synchronously updating per-context state primarily by programming all config state from the cpu and updating all current and saved contexts synchronously while the OA unit is still disabled. The driver intentionally avoids depending on command streamer programming to update OA state considering the lack of synchronization between the automatic loading of OACTXCONTROL state (that includes the periodic sampling state and enable state) on context restore and the parsing of any general purpose BB the driver can control. I.e. this implementation is careful to avoid the possibility of a context restore temporarily enabling any out-of-date periodic sampling state. In addition to the risk of transiently-out-of-date state being loaded automatically; there are also internal HW latencies involved in the loading of MUX configurations which would be difficult to account for from the command streamer (and we only want to enable the unit when once the MUX configuration is complete). Since the Gen8+ OA unit design no longer supports clock gating the unit off for a single given context (which effectively stopped any progress of counters while any other context was running) and instead supports tagging OA reports with a context ID for filtering on the CPU, it means we can no longer hide the system-wide progress of counters from a non-privileged application only interested in metrics for its own context. Although we could theoretically try and subtract the progress of other contexts before forwarding reports via read() we aren't in a position to filter reports captured via MI_REPORT_PERF_COUNT commands. As a result, for Gen8+, we always require the dev.i915.perf_stream_paranoid to be unset for any access to OA metrics if not root. v5: Drain submitted requests when enabling metric set to ensure no lite-restore erases the context image we just updated (Lionel) v6: In addition to drain, switch to kernel context & update all context in place (Chris) v7: Add missing mutex_unlock() if switching to kernel context fails (Matthew) v8: Simplify OA period/flex-eu-counters programming by using the batchbuffer instead of modifying ctx-image (Lionel) v9: Back to updating the context image (due to erroneous testing, batchbuffer programming the OA unit doesn't actually work) (Lionel) Pin context before updating context image (Chris) Drop MMIO programming now that we switch to a kernel context with right values in initial context image (Chris) v10: Just pin_map the contexts we want to modify or let the configuration happen on first use (Chris) v11: Update kernel context OA config through the batchbuffer rather than on the fly ctx-image update (Lionel) v12: Rework OA context registers update again by swithing away from user contexts and reconfiguring the kernel context through the batchbuffer and updating all the other contexts' context image. Also take care to lock slice/subslice configuration when OA is on. (Lionel) v13: Request rpcs updates on all engine when updating the OA config (Lionel) v14: Drop any kind of rpcs management now that we monitor sseu configuration changes in a later patch (Lionel) Remove usleep after programming the NOA configs on Gen8+, this doesn't seem to be needed (Lionel) v15: Respect coding style for block comments (Chris) v16: Add missing i915_add_request() in case we fail to emit OA configuration (Matthew) Signed-off-by: Robert Bragg <robert@sixbynine.org> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Matthew Auld <matthew.auld@intel.com> \o/ Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
2017-06-13 11:23:03 +00:00
I915_OA_FORMAT_A13 = 1, /* HSW only */
I915_OA_FORMAT_A29, /* HSW only */
I915_OA_FORMAT_A13_B8_C8, /* HSW only */
I915_OA_FORMAT_B4_C8, /* HSW only */
I915_OA_FORMAT_A45_B8_C8, /* HSW only */
I915_OA_FORMAT_B4_C8_A16, /* HSW only */
I915_OA_FORMAT_C4_B8, /* HSW+ */
/* Gen8+ */
I915_OA_FORMAT_A12,
I915_OA_FORMAT_A12_B8_C8,
I915_OA_FORMAT_A32u40_A4u32_B8_C8,
/* DG2 */
I915_OAR_FORMAT_A32u40_A4u32_B8_C8,
I915_OA_FORMAT_A24u40_A14u32_B8_C8,
/* MTL OAM */
I915_OAM_FORMAT_MPEC8u64_B8_C8,
I915_OAM_FORMAT_MPEC8u32_B8_C8,
I915_OA_FORMAT_MAX /* non-ABI */
};
drm/i915: Add i915 perf infrastructure Adds base i915 perf infrastructure for Gen performance metrics. This adds a DRM_IOCTL_I915_PERF_OPEN ioctl that takes an array of uint64 properties to configure a stream of metrics and returns a new fd usable with standard VFS system calls including read() to read typed and sized records; ioctl() to enable or disable capture and poll() to wait for data. A stream is opened something like: uint64_t properties[] = { /* Single context sampling */ DRM_I915_PERF_PROP_CTX_HANDLE, ctx_handle, /* Include OA reports in samples */ DRM_I915_PERF_PROP_SAMPLE_OA, true, /* OA unit configuration */ DRM_I915_PERF_PROP_OA_METRICS_SET, metrics_set_id, DRM_I915_PERF_PROP_OA_FORMAT, report_format, DRM_I915_PERF_PROP_OA_EXPONENT, period_exponent, }; struct drm_i915_perf_open_param parm = { .flags = I915_PERF_FLAG_FD_CLOEXEC | I915_PERF_FLAG_FD_NONBLOCK | I915_PERF_FLAG_DISABLED, .properties_ptr = (uint64_t)properties, .num_properties = sizeof(properties) / 16, }; int fd = drmIoctl(drm_fd, DRM_IOCTL_I915_PERF_OPEN, &param); Records read all start with a common { type, size } header with DRM_I915_PERF_RECORD_SAMPLE being of most interest. Sample records contain an extensible number of fields and it's the DRM_I915_PERF_PROP_SAMPLE_xyz properties given when opening that determine what's included in every sample. No specific streams are supported yet so any attempt to open a stream will return an error. v2: use i915_gem_context_get() - Chris Wilson v3: update read() interface to avoid passing state struct - Chris Wilson fix some rebase fallout, with i915-perf init/deinit v4: s/DRM_IORW/DRM_IOW/ - Emil Velikov Signed-off-by: Robert Bragg <robert@sixbynine.org> Reviewed-by: Matthew Auld <matthew.auld@intel.com> Reviewed-by: Sourab Gupta <sourab.gupta@intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch> Link: http://patchwork.freedesktop.org/patch/msgid/20161107194957.3385-2-robert@sixbynine.org
2016-11-07 19:49:47 +00:00
enum drm_i915_perf_property_id {
/**
* Open the stream for a specific context handle (as used with
* execbuffer2). A stream opened for a specific context this way
* won't typically require root privileges.
*
* This property is available in perf revision 1.
drm/i915: Add i915 perf infrastructure Adds base i915 perf infrastructure for Gen performance metrics. This adds a DRM_IOCTL_I915_PERF_OPEN ioctl that takes an array of uint64 properties to configure a stream of metrics and returns a new fd usable with standard VFS system calls including read() to read typed and sized records; ioctl() to enable or disable capture and poll() to wait for data. A stream is opened something like: uint64_t properties[] = { /* Single context sampling */ DRM_I915_PERF_PROP_CTX_HANDLE, ctx_handle, /* Include OA reports in samples */ DRM_I915_PERF_PROP_SAMPLE_OA, true, /* OA unit configuration */ DRM_I915_PERF_PROP_OA_METRICS_SET, metrics_set_id, DRM_I915_PERF_PROP_OA_FORMAT, report_format, DRM_I915_PERF_PROP_OA_EXPONENT, period_exponent, }; struct drm_i915_perf_open_param parm = { .flags = I915_PERF_FLAG_FD_CLOEXEC | I915_PERF_FLAG_FD_NONBLOCK | I915_PERF_FLAG_DISABLED, .properties_ptr = (uint64_t)properties, .num_properties = sizeof(properties) / 16, }; int fd = drmIoctl(drm_fd, DRM_IOCTL_I915_PERF_OPEN, &param); Records read all start with a common { type, size } header with DRM_I915_PERF_RECORD_SAMPLE being of most interest. Sample records contain an extensible number of fields and it's the DRM_I915_PERF_PROP_SAMPLE_xyz properties given when opening that determine what's included in every sample. No specific streams are supported yet so any attempt to open a stream will return an error. v2: use i915_gem_context_get() - Chris Wilson v3: update read() interface to avoid passing state struct - Chris Wilson fix some rebase fallout, with i915-perf init/deinit v4: s/DRM_IORW/DRM_IOW/ - Emil Velikov Signed-off-by: Robert Bragg <robert@sixbynine.org> Reviewed-by: Matthew Auld <matthew.auld@intel.com> Reviewed-by: Sourab Gupta <sourab.gupta@intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch> Link: http://patchwork.freedesktop.org/patch/msgid/20161107194957.3385-2-robert@sixbynine.org
2016-11-07 19:49:47 +00:00
*/
DRM_I915_PERF_PROP_CTX_HANDLE = 1,
/**
* A value of 1 requests the inclusion of raw OA unit reports as
* part of stream samples.
*
* This property is available in perf revision 1.
*/
DRM_I915_PERF_PROP_SAMPLE_OA,
/**
* The value specifies which set of OA unit metrics should be
* configured, defining the contents of any OA unit reports.
*
* This property is available in perf revision 1.
*/
DRM_I915_PERF_PROP_OA_METRICS_SET,
/**
* The value specifies the size and layout of OA unit reports.
*
* This property is available in perf revision 1.
*/
DRM_I915_PERF_PROP_OA_FORMAT,
/**
* Specifying this property implicitly requests periodic OA unit
* sampling and (at least on Haswell) the sampling frequency is derived
* from this exponent as follows:
*
* 80ns * 2^(period_exponent + 1)
*
* This property is available in perf revision 1.
*/
DRM_I915_PERF_PROP_OA_EXPONENT,
drm/i915/perf: allow holding preemption on filtered ctx We would like to make use of perf in Vulkan. The Vulkan API is much lower level than OpenGL, with applications directly exposed to the concept of command buffers (pretty much equivalent to our batch buffers). In Vulkan, queries are always limited in scope to a command buffer. In OpenGL, the lack of command buffer concept meant that queries' duration could span multiple command buffers. With that restriction gone in Vulkan, we would like to simplify measuring performance just by measuring the deltas between the counter snapshots written by 2 MI_RECORD_PERF_COUNT commands, rather than the more complex scheme we currently have in the GL driver, using 2 MI_RECORD_PERF_COUNT commands and doing some post processing on the stream of OA reports, coming from the global OA buffer, to remove any unrelated deltas in between the 2 MI_RECORD_PERF_COUNT. Disabling preemption only apply to a single context with which want to query performance counters for and is considered a privileged operation, by default protected by CAP_SYS_ADMIN. It is possible to enable it for a normal user by disabling the paranoid stream setting. v2: Store preemption setting in intel_context (Chris) v3: Use priorities to avoid preemption rather than the HW mechanism v4: Just modify the port priority reporting function v5: Add nopreempt flag on gem context and always flag requests appropriately, regarless of OA reconfiguration. Link: https://gitlab.freedesktop.org/mesa/mesa/merge_requests/932 Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Link: https://patchwork.freedesktop.org/patch/msgid/20191014201404.22468-4-chris@chris-wilson.co.uk
2019-10-14 20:14:04 +00:00
/**
* Specifying this property is only valid when specify a context to
* filter with DRM_I915_PERF_PROP_CTX_HANDLE. Specifying this property
* will hold preemption of the particular context we want to gather
* performance data about. The execbuf2 submissions must include a
* drm_i915_gem_execbuffer_ext_perf parameter for this to apply.
*
* This property is available in perf revision 3.
*/
DRM_I915_PERF_PROP_HOLD_PREEMPTION,
drm/i915/perf: introduce global sseu pinning On Gen11 powergating half the execution units is a functional requirement when using the VME samplers. Not fullfilling this requirement can lead to hangs. This unfortunately plays fairly poorly with the NOA requirements. NOA requires a stable power configuration to maintain its configuration. As a result using OA (and NOA feeding into it) so far has required us to use a power configuration that can work for all contexts. The only power configuration fullfilling this is powergating half the execution units. This makes performance analysis for 3D workloads somewhat pointless. Failing to find a solution that would work for everybody, this change introduces a new i915-perf stream open parameter that punts the decision off to userspace. If this parameter is omitted, the existing Gen11 behavior remains (half EU array powergating). This change takes the initiative to move all perf related sseu configuration into i915_perf.c v2: Make parameter priviliged if different from default v3: Fix context modifying its sseu config while i915-perf is enabled v4: Always consider global sseu a privileged operation (Tvrtko) Override req_sseu point in intel_sseu_make_rpcs() (Tvrtko) Remove unrelated changes (Tvrtko) v5: Some typos (Tvrtko) Process sseu param in read_properties_unlocked() (Tvrtko) v6: Actually commit the bits from v5... Fixup some checkpath warnings v7: Only compare engine uabi field (Chris) Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20200317132222.2638719-3-lionel.g.landwerlin@intel.com
2020-03-17 13:22:22 +00:00
/**
* Specifying this pins all contexts to the specified SSEU power
* configuration for the duration of the recording.
*
* This parameter's value is a pointer to a struct
* drm_i915_gem_context_param_sseu.
*
* This property is available in perf revision 4.
*/
DRM_I915_PERF_PROP_GLOBAL_SSEU,
/**
* This optional parameter specifies the timer interval in nanoseconds
* at which the i915 driver will check the OA buffer for available data.
* Minimum allowed value is 100 microseconds. A default value is used by
* the driver if this parameter is not specified. Note that larger timer
* values will reduce cpu consumption during OA perf captures. However,
* excessively large values would potentially result in OA buffer
* overwrites as captures reach end of the OA buffer.
*
* This property is available in perf revision 5.
*/
DRM_I915_PERF_PROP_POLL_OA_PERIOD,
/**
* Multiple engines may be mapped to the same OA unit. The OA unit is
* identified by class:instance of any engine mapped to it.
*
* This parameter specifies the engine class and must be passed along
* with DRM_I915_PERF_PROP_OA_ENGINE_INSTANCE.
*
* This property is available in perf revision 6.
*/
DRM_I915_PERF_PROP_OA_ENGINE_CLASS,
/**
* This parameter specifies the engine instance and must be passed along
* with DRM_I915_PERF_PROP_OA_ENGINE_CLASS.
*
* This property is available in perf revision 6.
*/
DRM_I915_PERF_PROP_OA_ENGINE_INSTANCE,
drm/i915: Add i915 perf infrastructure Adds base i915 perf infrastructure for Gen performance metrics. This adds a DRM_IOCTL_I915_PERF_OPEN ioctl that takes an array of uint64 properties to configure a stream of metrics and returns a new fd usable with standard VFS system calls including read() to read typed and sized records; ioctl() to enable or disable capture and poll() to wait for data. A stream is opened something like: uint64_t properties[] = { /* Single context sampling */ DRM_I915_PERF_PROP_CTX_HANDLE, ctx_handle, /* Include OA reports in samples */ DRM_I915_PERF_PROP_SAMPLE_OA, true, /* OA unit configuration */ DRM_I915_PERF_PROP_OA_METRICS_SET, metrics_set_id, DRM_I915_PERF_PROP_OA_FORMAT, report_format, DRM_I915_PERF_PROP_OA_EXPONENT, period_exponent, }; struct drm_i915_perf_open_param parm = { .flags = I915_PERF_FLAG_FD_CLOEXEC | I915_PERF_FLAG_FD_NONBLOCK | I915_PERF_FLAG_DISABLED, .properties_ptr = (uint64_t)properties, .num_properties = sizeof(properties) / 16, }; int fd = drmIoctl(drm_fd, DRM_IOCTL_I915_PERF_OPEN, &param); Records read all start with a common { type, size } header with DRM_I915_PERF_RECORD_SAMPLE being of most interest. Sample records contain an extensible number of fields and it's the DRM_I915_PERF_PROP_SAMPLE_xyz properties given when opening that determine what's included in every sample. No specific streams are supported yet so any attempt to open a stream will return an error. v2: use i915_gem_context_get() - Chris Wilson v3: update read() interface to avoid passing state struct - Chris Wilson fix some rebase fallout, with i915-perf init/deinit v4: s/DRM_IORW/DRM_IOW/ - Emil Velikov Signed-off-by: Robert Bragg <robert@sixbynine.org> Reviewed-by: Matthew Auld <matthew.auld@intel.com> Reviewed-by: Sourab Gupta <sourab.gupta@intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch> Link: http://patchwork.freedesktop.org/patch/msgid/20161107194957.3385-2-robert@sixbynine.org
2016-11-07 19:49:47 +00:00
DRM_I915_PERF_PROP_MAX /* non-ABI */
};
struct drm_i915_perf_open_param {
__u32 flags;
#define I915_PERF_FLAG_FD_CLOEXEC (1<<0)
#define I915_PERF_FLAG_FD_NONBLOCK (1<<1)
#define I915_PERF_FLAG_DISABLED (1<<2)
/** The number of u64 (id, value) pairs */
__u32 num_properties;
/**
* Pointer to array of u64 (id, value) pairs configuring the stream
* to open.
*/
__u64 properties_ptr;
drm/i915: Add i915 perf infrastructure Adds base i915 perf infrastructure for Gen performance metrics. This adds a DRM_IOCTL_I915_PERF_OPEN ioctl that takes an array of uint64 properties to configure a stream of metrics and returns a new fd usable with standard VFS system calls including read() to read typed and sized records; ioctl() to enable or disable capture and poll() to wait for data. A stream is opened something like: uint64_t properties[] = { /* Single context sampling */ DRM_I915_PERF_PROP_CTX_HANDLE, ctx_handle, /* Include OA reports in samples */ DRM_I915_PERF_PROP_SAMPLE_OA, true, /* OA unit configuration */ DRM_I915_PERF_PROP_OA_METRICS_SET, metrics_set_id, DRM_I915_PERF_PROP_OA_FORMAT, report_format, DRM_I915_PERF_PROP_OA_EXPONENT, period_exponent, }; struct drm_i915_perf_open_param parm = { .flags = I915_PERF_FLAG_FD_CLOEXEC | I915_PERF_FLAG_FD_NONBLOCK | I915_PERF_FLAG_DISABLED, .properties_ptr = (uint64_t)properties, .num_properties = sizeof(properties) / 16, }; int fd = drmIoctl(drm_fd, DRM_IOCTL_I915_PERF_OPEN, &param); Records read all start with a common { type, size } header with DRM_I915_PERF_RECORD_SAMPLE being of most interest. Sample records contain an extensible number of fields and it's the DRM_I915_PERF_PROP_SAMPLE_xyz properties given when opening that determine what's included in every sample. No specific streams are supported yet so any attempt to open a stream will return an error. v2: use i915_gem_context_get() - Chris Wilson v3: update read() interface to avoid passing state struct - Chris Wilson fix some rebase fallout, with i915-perf init/deinit v4: s/DRM_IORW/DRM_IOW/ - Emil Velikov Signed-off-by: Robert Bragg <robert@sixbynine.org> Reviewed-by: Matthew Auld <matthew.auld@intel.com> Reviewed-by: Sourab Gupta <sourab.gupta@intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch> Link: http://patchwork.freedesktop.org/patch/msgid/20161107194957.3385-2-robert@sixbynine.org
2016-11-07 19:49:47 +00:00
};
/*
* Enable data capture for a stream that was either opened in a disabled state
* via I915_PERF_FLAG_DISABLED or was later disabled via
* I915_PERF_IOCTL_DISABLE.
*
* It is intended to be cheaper to disable and enable a stream than it may be
* to close and re-open a stream with the same configuration.
*
* It's undefined whether any pending data for the stream will be lost.
*
* This ioctl is available in perf revision 1.
*/
drm/i915: Add i915 perf infrastructure Adds base i915 perf infrastructure for Gen performance metrics. This adds a DRM_IOCTL_I915_PERF_OPEN ioctl that takes an array of uint64 properties to configure a stream of metrics and returns a new fd usable with standard VFS system calls including read() to read typed and sized records; ioctl() to enable or disable capture and poll() to wait for data. A stream is opened something like: uint64_t properties[] = { /* Single context sampling */ DRM_I915_PERF_PROP_CTX_HANDLE, ctx_handle, /* Include OA reports in samples */ DRM_I915_PERF_PROP_SAMPLE_OA, true, /* OA unit configuration */ DRM_I915_PERF_PROP_OA_METRICS_SET, metrics_set_id, DRM_I915_PERF_PROP_OA_FORMAT, report_format, DRM_I915_PERF_PROP_OA_EXPONENT, period_exponent, }; struct drm_i915_perf_open_param parm = { .flags = I915_PERF_FLAG_FD_CLOEXEC | I915_PERF_FLAG_FD_NONBLOCK | I915_PERF_FLAG_DISABLED, .properties_ptr = (uint64_t)properties, .num_properties = sizeof(properties) / 16, }; int fd = drmIoctl(drm_fd, DRM_IOCTL_I915_PERF_OPEN, &param); Records read all start with a common { type, size } header with DRM_I915_PERF_RECORD_SAMPLE being of most interest. Sample records contain an extensible number of fields and it's the DRM_I915_PERF_PROP_SAMPLE_xyz properties given when opening that determine what's included in every sample. No specific streams are supported yet so any attempt to open a stream will return an error. v2: use i915_gem_context_get() - Chris Wilson v3: update read() interface to avoid passing state struct - Chris Wilson fix some rebase fallout, with i915-perf init/deinit v4: s/DRM_IORW/DRM_IOW/ - Emil Velikov Signed-off-by: Robert Bragg <robert@sixbynine.org> Reviewed-by: Matthew Auld <matthew.auld@intel.com> Reviewed-by: Sourab Gupta <sourab.gupta@intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch> Link: http://patchwork.freedesktop.org/patch/msgid/20161107194957.3385-2-robert@sixbynine.org
2016-11-07 19:49:47 +00:00
#define I915_PERF_IOCTL_ENABLE _IO('i', 0x0)
/*
* Disable data capture for a stream.
*
* It is an error to try and read a stream that is disabled.
*
* This ioctl is available in perf revision 1.
*/
drm/i915: Add i915 perf infrastructure Adds base i915 perf infrastructure for Gen performance metrics. This adds a DRM_IOCTL_I915_PERF_OPEN ioctl that takes an array of uint64 properties to configure a stream of metrics and returns a new fd usable with standard VFS system calls including read() to read typed and sized records; ioctl() to enable or disable capture and poll() to wait for data. A stream is opened something like: uint64_t properties[] = { /* Single context sampling */ DRM_I915_PERF_PROP_CTX_HANDLE, ctx_handle, /* Include OA reports in samples */ DRM_I915_PERF_PROP_SAMPLE_OA, true, /* OA unit configuration */ DRM_I915_PERF_PROP_OA_METRICS_SET, metrics_set_id, DRM_I915_PERF_PROP_OA_FORMAT, report_format, DRM_I915_PERF_PROP_OA_EXPONENT, period_exponent, }; struct drm_i915_perf_open_param parm = { .flags = I915_PERF_FLAG_FD_CLOEXEC | I915_PERF_FLAG_FD_NONBLOCK | I915_PERF_FLAG_DISABLED, .properties_ptr = (uint64_t)properties, .num_properties = sizeof(properties) / 16, }; int fd = drmIoctl(drm_fd, DRM_IOCTL_I915_PERF_OPEN, &param); Records read all start with a common { type, size } header with DRM_I915_PERF_RECORD_SAMPLE being of most interest. Sample records contain an extensible number of fields and it's the DRM_I915_PERF_PROP_SAMPLE_xyz properties given when opening that determine what's included in every sample. No specific streams are supported yet so any attempt to open a stream will return an error. v2: use i915_gem_context_get() - Chris Wilson v3: update read() interface to avoid passing state struct - Chris Wilson fix some rebase fallout, with i915-perf init/deinit v4: s/DRM_IORW/DRM_IOW/ - Emil Velikov Signed-off-by: Robert Bragg <robert@sixbynine.org> Reviewed-by: Matthew Auld <matthew.auld@intel.com> Reviewed-by: Sourab Gupta <sourab.gupta@intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch> Link: http://patchwork.freedesktop.org/patch/msgid/20161107194957.3385-2-robert@sixbynine.org
2016-11-07 19:49:47 +00:00
#define I915_PERF_IOCTL_DISABLE _IO('i', 0x1)
/*
* Change metrics_set captured by a stream.
*
* If the stream is bound to a specific context, the configuration change
* will performed inline with that context such that it takes effect before
* the next execbuf submission.
*
* Returns the previously bound metrics set id, or a negative error code.
*
* This ioctl is available in perf revision 2.
*/
#define I915_PERF_IOCTL_CONFIG _IO('i', 0x2)
/*
drm/i915: Add i915 perf infrastructure Adds base i915 perf infrastructure for Gen performance metrics. This adds a DRM_IOCTL_I915_PERF_OPEN ioctl that takes an array of uint64 properties to configure a stream of metrics and returns a new fd usable with standard VFS system calls including read() to read typed and sized records; ioctl() to enable or disable capture and poll() to wait for data. A stream is opened something like: uint64_t properties[] = { /* Single context sampling */ DRM_I915_PERF_PROP_CTX_HANDLE, ctx_handle, /* Include OA reports in samples */ DRM_I915_PERF_PROP_SAMPLE_OA, true, /* OA unit configuration */ DRM_I915_PERF_PROP_OA_METRICS_SET, metrics_set_id, DRM_I915_PERF_PROP_OA_FORMAT, report_format, DRM_I915_PERF_PROP_OA_EXPONENT, period_exponent, }; struct drm_i915_perf_open_param parm = { .flags = I915_PERF_FLAG_FD_CLOEXEC | I915_PERF_FLAG_FD_NONBLOCK | I915_PERF_FLAG_DISABLED, .properties_ptr = (uint64_t)properties, .num_properties = sizeof(properties) / 16, }; int fd = drmIoctl(drm_fd, DRM_IOCTL_I915_PERF_OPEN, &param); Records read all start with a common { type, size } header with DRM_I915_PERF_RECORD_SAMPLE being of most interest. Sample records contain an extensible number of fields and it's the DRM_I915_PERF_PROP_SAMPLE_xyz properties given when opening that determine what's included in every sample. No specific streams are supported yet so any attempt to open a stream will return an error. v2: use i915_gem_context_get() - Chris Wilson v3: update read() interface to avoid passing state struct - Chris Wilson fix some rebase fallout, with i915-perf init/deinit v4: s/DRM_IORW/DRM_IOW/ - Emil Velikov Signed-off-by: Robert Bragg <robert@sixbynine.org> Reviewed-by: Matthew Auld <matthew.auld@intel.com> Reviewed-by: Sourab Gupta <sourab.gupta@intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch> Link: http://patchwork.freedesktop.org/patch/msgid/20161107194957.3385-2-robert@sixbynine.org
2016-11-07 19:49:47 +00:00
* Common to all i915 perf records
*/
struct drm_i915_perf_record_header {
__u32 type;
__u16 pad;
__u16 size;
};
enum drm_i915_perf_record_type {
/**
* Samples are the work horse record type whose contents are extensible
* and defined when opening an i915 perf stream based on the given
* properties.
*
* Boolean properties following the naming convention
* DRM_I915_PERF_SAMPLE_xyz_PROP request the inclusion of 'xyz' data in
* every sample.
*
* The order of these sample properties given by userspace has no
* affect on the ordering of data within a sample. The order is
drm/i915: Add i915 perf infrastructure Adds base i915 perf infrastructure for Gen performance metrics. This adds a DRM_IOCTL_I915_PERF_OPEN ioctl that takes an array of uint64 properties to configure a stream of metrics and returns a new fd usable with standard VFS system calls including read() to read typed and sized records; ioctl() to enable or disable capture and poll() to wait for data. A stream is opened something like: uint64_t properties[] = { /* Single context sampling */ DRM_I915_PERF_PROP_CTX_HANDLE, ctx_handle, /* Include OA reports in samples */ DRM_I915_PERF_PROP_SAMPLE_OA, true, /* OA unit configuration */ DRM_I915_PERF_PROP_OA_METRICS_SET, metrics_set_id, DRM_I915_PERF_PROP_OA_FORMAT, report_format, DRM_I915_PERF_PROP_OA_EXPONENT, period_exponent, }; struct drm_i915_perf_open_param parm = { .flags = I915_PERF_FLAG_FD_CLOEXEC | I915_PERF_FLAG_FD_NONBLOCK | I915_PERF_FLAG_DISABLED, .properties_ptr = (uint64_t)properties, .num_properties = sizeof(properties) / 16, }; int fd = drmIoctl(drm_fd, DRM_IOCTL_I915_PERF_OPEN, &param); Records read all start with a common { type, size } header with DRM_I915_PERF_RECORD_SAMPLE being of most interest. Sample records contain an extensible number of fields and it's the DRM_I915_PERF_PROP_SAMPLE_xyz properties given when opening that determine what's included in every sample. No specific streams are supported yet so any attempt to open a stream will return an error. v2: use i915_gem_context_get() - Chris Wilson v3: update read() interface to avoid passing state struct - Chris Wilson fix some rebase fallout, with i915-perf init/deinit v4: s/DRM_IORW/DRM_IOW/ - Emil Velikov Signed-off-by: Robert Bragg <robert@sixbynine.org> Reviewed-by: Matthew Auld <matthew.auld@intel.com> Reviewed-by: Sourab Gupta <sourab.gupta@intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch> Link: http://patchwork.freedesktop.org/patch/msgid/20161107194957.3385-2-robert@sixbynine.org
2016-11-07 19:49:47 +00:00
* documented here.
*
* struct {
* struct drm_i915_perf_record_header header;
*
* { u32 oa_report[]; } && DRM_I915_PERF_PROP_SAMPLE_OA
drm/i915: Add i915 perf infrastructure Adds base i915 perf infrastructure for Gen performance metrics. This adds a DRM_IOCTL_I915_PERF_OPEN ioctl that takes an array of uint64 properties to configure a stream of metrics and returns a new fd usable with standard VFS system calls including read() to read typed and sized records; ioctl() to enable or disable capture and poll() to wait for data. A stream is opened something like: uint64_t properties[] = { /* Single context sampling */ DRM_I915_PERF_PROP_CTX_HANDLE, ctx_handle, /* Include OA reports in samples */ DRM_I915_PERF_PROP_SAMPLE_OA, true, /* OA unit configuration */ DRM_I915_PERF_PROP_OA_METRICS_SET, metrics_set_id, DRM_I915_PERF_PROP_OA_FORMAT, report_format, DRM_I915_PERF_PROP_OA_EXPONENT, period_exponent, }; struct drm_i915_perf_open_param parm = { .flags = I915_PERF_FLAG_FD_CLOEXEC | I915_PERF_FLAG_FD_NONBLOCK | I915_PERF_FLAG_DISABLED, .properties_ptr = (uint64_t)properties, .num_properties = sizeof(properties) / 16, }; int fd = drmIoctl(drm_fd, DRM_IOCTL_I915_PERF_OPEN, &param); Records read all start with a common { type, size } header with DRM_I915_PERF_RECORD_SAMPLE being of most interest. Sample records contain an extensible number of fields and it's the DRM_I915_PERF_PROP_SAMPLE_xyz properties given when opening that determine what's included in every sample. No specific streams are supported yet so any attempt to open a stream will return an error. v2: use i915_gem_context_get() - Chris Wilson v3: update read() interface to avoid passing state struct - Chris Wilson fix some rebase fallout, with i915-perf init/deinit v4: s/DRM_IORW/DRM_IOW/ - Emil Velikov Signed-off-by: Robert Bragg <robert@sixbynine.org> Reviewed-by: Matthew Auld <matthew.auld@intel.com> Reviewed-by: Sourab Gupta <sourab.gupta@intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch> Link: http://patchwork.freedesktop.org/patch/msgid/20161107194957.3385-2-robert@sixbynine.org
2016-11-07 19:49:47 +00:00
* };
*/
DRM_I915_PERF_RECORD_SAMPLE = 1,
/*
* Indicates that one or more OA reports were not written by the
* hardware. This can happen for example if an MI_REPORT_PERF_COUNT
* command collides with periodic sampling - which would be more likely
* at higher sampling frequencies.
*/
DRM_I915_PERF_RECORD_OA_REPORT_LOST = 2,
/**
* An error occurred that resulted in all pending OA reports being lost.
*/
DRM_I915_PERF_RECORD_OA_BUFFER_LOST = 3,
drm/i915: Add i915 perf infrastructure Adds base i915 perf infrastructure for Gen performance metrics. This adds a DRM_IOCTL_I915_PERF_OPEN ioctl that takes an array of uint64 properties to configure a stream of metrics and returns a new fd usable with standard VFS system calls including read() to read typed and sized records; ioctl() to enable or disable capture and poll() to wait for data. A stream is opened something like: uint64_t properties[] = { /* Single context sampling */ DRM_I915_PERF_PROP_CTX_HANDLE, ctx_handle, /* Include OA reports in samples */ DRM_I915_PERF_PROP_SAMPLE_OA, true, /* OA unit configuration */ DRM_I915_PERF_PROP_OA_METRICS_SET, metrics_set_id, DRM_I915_PERF_PROP_OA_FORMAT, report_format, DRM_I915_PERF_PROP_OA_EXPONENT, period_exponent, }; struct drm_i915_perf_open_param parm = { .flags = I915_PERF_FLAG_FD_CLOEXEC | I915_PERF_FLAG_FD_NONBLOCK | I915_PERF_FLAG_DISABLED, .properties_ptr = (uint64_t)properties, .num_properties = sizeof(properties) / 16, }; int fd = drmIoctl(drm_fd, DRM_IOCTL_I915_PERF_OPEN, &param); Records read all start with a common { type, size } header with DRM_I915_PERF_RECORD_SAMPLE being of most interest. Sample records contain an extensible number of fields and it's the DRM_I915_PERF_PROP_SAMPLE_xyz properties given when opening that determine what's included in every sample. No specific streams are supported yet so any attempt to open a stream will return an error. v2: use i915_gem_context_get() - Chris Wilson v3: update read() interface to avoid passing state struct - Chris Wilson fix some rebase fallout, with i915-perf init/deinit v4: s/DRM_IORW/DRM_IOW/ - Emil Velikov Signed-off-by: Robert Bragg <robert@sixbynine.org> Reviewed-by: Matthew Auld <matthew.auld@intel.com> Reviewed-by: Sourab Gupta <sourab.gupta@intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch> Link: http://patchwork.freedesktop.org/patch/msgid/20161107194957.3385-2-robert@sixbynine.org
2016-11-07 19:49:47 +00:00
DRM_I915_PERF_RECORD_MAX /* non-ABI */
};
/**
* struct drm_i915_perf_oa_config
*
drm/i915/perf: Implement I915_PERF_ADD/REMOVE_CONFIG interface The motivation behind this new interface is expose at runtime the creation of new OA configs which can be used as part of the i915 perf open interface. This will enable the kernel to learn new configs which may be experimental, or otherwise not part of the core set currently available through the i915 perf interface. v2: Drop DRM_ERROR for userspace errors (Matthew) Add padding to userspace structure (Matthew) s/guid/uuid/ (Matthew) v3: Use u32 instead of int to iterate through registers (Matthew) v4: Lock access to dynamic config list (Lionel) v5: by Matthew: Fix uninitialized error values Fix incorrect unwiding when opening perf stream Use kmalloc_array() to store register Use uuid_is_valid() to valid config uuids Declare ioctls as write only Check padding members are set to 0 by Lionel: Return ENOENT rather than EINVAL when trying to remove non existing config v6: by Chris: Use ref counts for OA configs Store UUID in drm_i915_perf_oa_config rather then using pointer Shuffle fields of drm_i915_perf_oa_config to avoid padding v7: by Chris Rename uapi pointers fields to end with '_ptr' v8: by Andrzej, Marek, Sebastian Update register whitelisting by Lionel Add more register names for documentation Allow configuration programming in non-paranoid mode Add support for value filter for a couple of registers already programmed in other part of the kernel v9: Documentation fix (Lionel) Allow writing WAIT_FOR_RC6_EXIT only on Gen8+ (Andrzej) v10: Perform read access_ok() on register pointers (Lionel) Signed-off-by: Matthew Auld <matthew.auld@intel.com> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Signed-off-by: Andrzej Datczuk <andrzej.datczuk@intel.com> Reviewed-by: Andrzej Datczuk <andrzej.datczuk@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20170803165812.2373-2-lionel.g.landwerlin@intel.com
2017-08-03 17:05:50 +00:00
* Structure to upload perf dynamic configuration into the kernel.
*/
struct drm_i915_perf_oa_config {
/**
* @uuid:
*
* String formatted like "%\08x-%\04x-%\04x-%\04x-%\012x"
*/
drm/i915/perf: Implement I915_PERF_ADD/REMOVE_CONFIG interface The motivation behind this new interface is expose at runtime the creation of new OA configs which can be used as part of the i915 perf open interface. This will enable the kernel to learn new configs which may be experimental, or otherwise not part of the core set currently available through the i915 perf interface. v2: Drop DRM_ERROR for userspace errors (Matthew) Add padding to userspace structure (Matthew) s/guid/uuid/ (Matthew) v3: Use u32 instead of int to iterate through registers (Matthew) v4: Lock access to dynamic config list (Lionel) v5: by Matthew: Fix uninitialized error values Fix incorrect unwiding when opening perf stream Use kmalloc_array() to store register Use uuid_is_valid() to valid config uuids Declare ioctls as write only Check padding members are set to 0 by Lionel: Return ENOENT rather than EINVAL when trying to remove non existing config v6: by Chris: Use ref counts for OA configs Store UUID in drm_i915_perf_oa_config rather then using pointer Shuffle fields of drm_i915_perf_oa_config to avoid padding v7: by Chris Rename uapi pointers fields to end with '_ptr' v8: by Andrzej, Marek, Sebastian Update register whitelisting by Lionel Add more register names for documentation Allow configuration programming in non-paranoid mode Add support for value filter for a couple of registers already programmed in other part of the kernel v9: Documentation fix (Lionel) Allow writing WAIT_FOR_RC6_EXIT only on Gen8+ (Andrzej) v10: Perform read access_ok() on register pointers (Lionel) Signed-off-by: Matthew Auld <matthew.auld@intel.com> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Signed-off-by: Andrzej Datczuk <andrzej.datczuk@intel.com> Reviewed-by: Andrzej Datczuk <andrzej.datczuk@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20170803165812.2373-2-lionel.g.landwerlin@intel.com
2017-08-03 17:05:50 +00:00
char uuid[36];
/**
* @n_mux_regs:
*
* Number of mux regs in &mux_regs_ptr.
*/
drm/i915/perf: Implement I915_PERF_ADD/REMOVE_CONFIG interface The motivation behind this new interface is expose at runtime the creation of new OA configs which can be used as part of the i915 perf open interface. This will enable the kernel to learn new configs which may be experimental, or otherwise not part of the core set currently available through the i915 perf interface. v2: Drop DRM_ERROR for userspace errors (Matthew) Add padding to userspace structure (Matthew) s/guid/uuid/ (Matthew) v3: Use u32 instead of int to iterate through registers (Matthew) v4: Lock access to dynamic config list (Lionel) v5: by Matthew: Fix uninitialized error values Fix incorrect unwiding when opening perf stream Use kmalloc_array() to store register Use uuid_is_valid() to valid config uuids Declare ioctls as write only Check padding members are set to 0 by Lionel: Return ENOENT rather than EINVAL when trying to remove non existing config v6: by Chris: Use ref counts for OA configs Store UUID in drm_i915_perf_oa_config rather then using pointer Shuffle fields of drm_i915_perf_oa_config to avoid padding v7: by Chris Rename uapi pointers fields to end with '_ptr' v8: by Andrzej, Marek, Sebastian Update register whitelisting by Lionel Add more register names for documentation Allow configuration programming in non-paranoid mode Add support for value filter for a couple of registers already programmed in other part of the kernel v9: Documentation fix (Lionel) Allow writing WAIT_FOR_RC6_EXIT only on Gen8+ (Andrzej) v10: Perform read access_ok() on register pointers (Lionel) Signed-off-by: Matthew Auld <matthew.auld@intel.com> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Signed-off-by: Andrzej Datczuk <andrzej.datczuk@intel.com> Reviewed-by: Andrzej Datczuk <andrzej.datczuk@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20170803165812.2373-2-lionel.g.landwerlin@intel.com
2017-08-03 17:05:50 +00:00
__u32 n_mux_regs;
/**
* @n_boolean_regs:
*
* Number of boolean regs in &boolean_regs_ptr.
*/
drm/i915/perf: Implement I915_PERF_ADD/REMOVE_CONFIG interface The motivation behind this new interface is expose at runtime the creation of new OA configs which can be used as part of the i915 perf open interface. This will enable the kernel to learn new configs which may be experimental, or otherwise not part of the core set currently available through the i915 perf interface. v2: Drop DRM_ERROR for userspace errors (Matthew) Add padding to userspace structure (Matthew) s/guid/uuid/ (Matthew) v3: Use u32 instead of int to iterate through registers (Matthew) v4: Lock access to dynamic config list (Lionel) v5: by Matthew: Fix uninitialized error values Fix incorrect unwiding when opening perf stream Use kmalloc_array() to store register Use uuid_is_valid() to valid config uuids Declare ioctls as write only Check padding members are set to 0 by Lionel: Return ENOENT rather than EINVAL when trying to remove non existing config v6: by Chris: Use ref counts for OA configs Store UUID in drm_i915_perf_oa_config rather then using pointer Shuffle fields of drm_i915_perf_oa_config to avoid padding v7: by Chris Rename uapi pointers fields to end with '_ptr' v8: by Andrzej, Marek, Sebastian Update register whitelisting by Lionel Add more register names for documentation Allow configuration programming in non-paranoid mode Add support for value filter for a couple of registers already programmed in other part of the kernel v9: Documentation fix (Lionel) Allow writing WAIT_FOR_RC6_EXIT only on Gen8+ (Andrzej) v10: Perform read access_ok() on register pointers (Lionel) Signed-off-by: Matthew Auld <matthew.auld@intel.com> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Signed-off-by: Andrzej Datczuk <andrzej.datczuk@intel.com> Reviewed-by: Andrzej Datczuk <andrzej.datczuk@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20170803165812.2373-2-lionel.g.landwerlin@intel.com
2017-08-03 17:05:50 +00:00
__u32 n_boolean_regs;
/**
* @n_flex_regs:
*
* Number of flex regs in &flex_regs_ptr.
*/
drm/i915/perf: Implement I915_PERF_ADD/REMOVE_CONFIG interface The motivation behind this new interface is expose at runtime the creation of new OA configs which can be used as part of the i915 perf open interface. This will enable the kernel to learn new configs which may be experimental, or otherwise not part of the core set currently available through the i915 perf interface. v2: Drop DRM_ERROR for userspace errors (Matthew) Add padding to userspace structure (Matthew) s/guid/uuid/ (Matthew) v3: Use u32 instead of int to iterate through registers (Matthew) v4: Lock access to dynamic config list (Lionel) v5: by Matthew: Fix uninitialized error values Fix incorrect unwiding when opening perf stream Use kmalloc_array() to store register Use uuid_is_valid() to valid config uuids Declare ioctls as write only Check padding members are set to 0 by Lionel: Return ENOENT rather than EINVAL when trying to remove non existing config v6: by Chris: Use ref counts for OA configs Store UUID in drm_i915_perf_oa_config rather then using pointer Shuffle fields of drm_i915_perf_oa_config to avoid padding v7: by Chris Rename uapi pointers fields to end with '_ptr' v8: by Andrzej, Marek, Sebastian Update register whitelisting by Lionel Add more register names for documentation Allow configuration programming in non-paranoid mode Add support for value filter for a couple of registers already programmed in other part of the kernel v9: Documentation fix (Lionel) Allow writing WAIT_FOR_RC6_EXIT only on Gen8+ (Andrzej) v10: Perform read access_ok() on register pointers (Lionel) Signed-off-by: Matthew Auld <matthew.auld@intel.com> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Signed-off-by: Andrzej Datczuk <andrzej.datczuk@intel.com> Reviewed-by: Andrzej Datczuk <andrzej.datczuk@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20170803165812.2373-2-lionel.g.landwerlin@intel.com
2017-08-03 17:05:50 +00:00
__u32 n_flex_regs;
/**
* @mux_regs_ptr:
*
* Pointer to tuples of u32 values (register address, value) for mux
* registers. Expected length of buffer is (2 * sizeof(u32) *
* &n_mux_regs).
*/
__u64 mux_regs_ptr;
/**
* @boolean_regs_ptr:
*
* Pointer to tuples of u32 values (register address, value) for mux
* registers. Expected length of buffer is (2 * sizeof(u32) *
* &n_boolean_regs).
*/
__u64 boolean_regs_ptr;
/**
* @flex_regs_ptr:
*
* Pointer to tuples of u32 values (register address, value) for mux
* registers. Expected length of buffer is (2 * sizeof(u32) *
* &n_flex_regs).
*/
__u64 flex_regs_ptr;
drm/i915/perf: Implement I915_PERF_ADD/REMOVE_CONFIG interface The motivation behind this new interface is expose at runtime the creation of new OA configs which can be used as part of the i915 perf open interface. This will enable the kernel to learn new configs which may be experimental, or otherwise not part of the core set currently available through the i915 perf interface. v2: Drop DRM_ERROR for userspace errors (Matthew) Add padding to userspace structure (Matthew) s/guid/uuid/ (Matthew) v3: Use u32 instead of int to iterate through registers (Matthew) v4: Lock access to dynamic config list (Lionel) v5: by Matthew: Fix uninitialized error values Fix incorrect unwiding when opening perf stream Use kmalloc_array() to store register Use uuid_is_valid() to valid config uuids Declare ioctls as write only Check padding members are set to 0 by Lionel: Return ENOENT rather than EINVAL when trying to remove non existing config v6: by Chris: Use ref counts for OA configs Store UUID in drm_i915_perf_oa_config rather then using pointer Shuffle fields of drm_i915_perf_oa_config to avoid padding v7: by Chris Rename uapi pointers fields to end with '_ptr' v8: by Andrzej, Marek, Sebastian Update register whitelisting by Lionel Add more register names for documentation Allow configuration programming in non-paranoid mode Add support for value filter for a couple of registers already programmed in other part of the kernel v9: Documentation fix (Lionel) Allow writing WAIT_FOR_RC6_EXIT only on Gen8+ (Andrzej) v10: Perform read access_ok() on register pointers (Lionel) Signed-off-by: Matthew Auld <matthew.auld@intel.com> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Signed-off-by: Andrzej Datczuk <andrzej.datczuk@intel.com> Reviewed-by: Andrzej Datczuk <andrzej.datczuk@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20170803165812.2373-2-lionel.g.landwerlin@intel.com
2017-08-03 17:05:50 +00:00
};
/**
* struct drm_i915_query_item - An individual query for the kernel to process.
*
* The behaviour is determined by the @query_id. Note that exactly what
* @data_ptr is also depends on the specific @query_id.
*/
drm/i915: add query uAPI There are a number of information that are readable from hardware registers and that we would like to make accessible to userspace. One particular example is the topology of the execution units (how are execution units grouped in subslices and slices and also which ones have been fused off for die recovery). At the moment the GET_PARAM ioctl covers some basic needs, but generally is only able to return a single value for each defined parameter. This is a bit problematic with topology descriptions which are array/maps of available units. This change introduces a new ioctl that can deal with requests to fill structures of potentially variable lengths. The user is expected fill a query with length fields set at 0 on the first call, the kernel then sets the length fields to the their expected values. A second call to the kernel with length fields at their expected values will trigger a copy of the data to the pointed memory locations. The scope of this uAPI is only to provide information to userspace, not to allow configuration of the device. v2: Simplify dispatcher code iteration (Tvrtko) Tweak uapi drm_i915_query_item structure (Tvrtko) v3: Rename pad fields into flags (Chris) Return error on flags field != 0 (Chris) Only copy length back to userspace in drm_i915_query_item (Chris) v4: Use array of functions instead of switch (Chris) v5: More comments in uapi (Tvrtko) Return query item errors in length field (All) v6: Tweak uapi comments style to match the coding style (Lionel) v7: Add i915_query.h (Joonas) v8: (Lionel) Change the behavior of the item iterator to report invalid queries into the query item rather than stopping the iteration. This enables userspace applications to query newer items on older kernels and only have failure on the items that are not supported. v9: Edit copyright headers (Joonas) v10: Typos & comments in uapi (Joonas) Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Acked-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20180306122857.27317-6-lionel.g.landwerlin@intel.com
2018-03-06 12:28:56 +00:00
struct drm_i915_query_item {
/**
* @query_id:
*
* The id for this query. Currently accepted query IDs are:
* - %DRM_I915_QUERY_TOPOLOGY_INFO (see struct drm_i915_query_topology_info)
* - %DRM_I915_QUERY_ENGINE_INFO (see struct drm_i915_engine_info)
* - %DRM_I915_QUERY_PERF_CONFIG (see struct drm_i915_query_perf_config)
* - %DRM_I915_QUERY_MEMORY_REGIONS (see struct drm_i915_query_memory_regions)
* - %DRM_I915_QUERY_HWCONFIG_BLOB (see `GuC HWCONFIG blob uAPI`)
drm/i915/uapi: Add DRM_I915_QUERY_GEOMETRY_SUBSLICES Newer platforms have DSS that aren't necessarily available for both geometry and compute, two queries will need to exist. This introduces the first, when passing a valid engine class and engine instance in the flags returns a topology describing geometry. Based on past discussion, we currently only support this new query item on Xe_HP and beyond; earlier platforms do not need to worry about geometry and compute pipelines having access to different topology and should continue to use the existing topology query. v2: fix white space errors v3: change flags from hosting 2 8 bit numbers to holding a i915_engine_class_instance struct v4: add error if non rcs engine passed. v5 (by MattR): - Improve kerneldoc and cross references to related structs/enums. (Daniel) - Clarify that geometry query is only supported on render engines (Francisco) - Clarify that the new query is only supported on Xe_HP+. - Fix checkpatch warnings. Cc: Ashutosh Dixit <ashutosh.dixit@intel.com> Cc: Matt Roper <matthew.d.roper@intel.com> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Francisco Jerez <currojerez@riseup.net> UMD (mesa): https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14143 Testcase: igt@i915_query@test-query-geometry-subslices Signed-off-by: Matt Atwood <matthew.s.atwood@intel.com> Signed-off-by: Matt Roper <matthew.d.roper@intel.com> Reviewed-by: Francisco Jerez <currojerez@riseup.net> Link: https://patchwork.freedesktop.org/patch/msgid/20220414192230.749771-4-matthew.d.roper@intel.com
2022-04-14 19:22:30 +00:00
* - %DRM_I915_QUERY_GEOMETRY_SUBSLICES (see struct drm_i915_query_topology_info)
* - %DRM_I915_QUERY_GUC_SUBMISSION_VERSION (see struct drm_i915_query_guc_submission_version)
*/
drm/i915: add query uAPI There are a number of information that are readable from hardware registers and that we would like to make accessible to userspace. One particular example is the topology of the execution units (how are execution units grouped in subslices and slices and also which ones have been fused off for die recovery). At the moment the GET_PARAM ioctl covers some basic needs, but generally is only able to return a single value for each defined parameter. This is a bit problematic with topology descriptions which are array/maps of available units. This change introduces a new ioctl that can deal with requests to fill structures of potentially variable lengths. The user is expected fill a query with length fields set at 0 on the first call, the kernel then sets the length fields to the their expected values. A second call to the kernel with length fields at their expected values will trigger a copy of the data to the pointed memory locations. The scope of this uAPI is only to provide information to userspace, not to allow configuration of the device. v2: Simplify dispatcher code iteration (Tvrtko) Tweak uapi drm_i915_query_item structure (Tvrtko) v3: Rename pad fields into flags (Chris) Return error on flags field != 0 (Chris) Only copy length back to userspace in drm_i915_query_item (Chris) v4: Use array of functions instead of switch (Chris) v5: More comments in uapi (Tvrtko) Return query item errors in length field (All) v6: Tweak uapi comments style to match the coding style (Lionel) v7: Add i915_query.h (Joonas) v8: (Lionel) Change the behavior of the item iterator to report invalid queries into the query item rather than stopping the iteration. This enables userspace applications to query newer items on older kernels and only have failure on the items that are not supported. v9: Edit copyright headers (Joonas) v10: Typos & comments in uapi (Joonas) Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Acked-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20180306122857.27317-6-lionel.g.landwerlin@intel.com
2018-03-06 12:28:56 +00:00
__u64 query_id;
#define DRM_I915_QUERY_TOPOLOGY_INFO 1
#define DRM_I915_QUERY_ENGINE_INFO 2
#define DRM_I915_QUERY_PERF_CONFIG 3
#define DRM_I915_QUERY_MEMORY_REGIONS 4
#define DRM_I915_QUERY_HWCONFIG_BLOB 5
drm/i915/uapi: Add DRM_I915_QUERY_GEOMETRY_SUBSLICES Newer platforms have DSS that aren't necessarily available for both geometry and compute, two queries will need to exist. This introduces the first, when passing a valid engine class and engine instance in the flags returns a topology describing geometry. Based on past discussion, we currently only support this new query item on Xe_HP and beyond; earlier platforms do not need to worry about geometry and compute pipelines having access to different topology and should continue to use the existing topology query. v2: fix white space errors v3: change flags from hosting 2 8 bit numbers to holding a i915_engine_class_instance struct v4: add error if non rcs engine passed. v5 (by MattR): - Improve kerneldoc and cross references to related structs/enums. (Daniel) - Clarify that geometry query is only supported on render engines (Francisco) - Clarify that the new query is only supported on Xe_HP+. - Fix checkpatch warnings. Cc: Ashutosh Dixit <ashutosh.dixit@intel.com> Cc: Matt Roper <matthew.d.roper@intel.com> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Francisco Jerez <currojerez@riseup.net> UMD (mesa): https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14143 Testcase: igt@i915_query@test-query-geometry-subslices Signed-off-by: Matt Atwood <matthew.s.atwood@intel.com> Signed-off-by: Matt Roper <matthew.d.roper@intel.com> Reviewed-by: Francisco Jerez <currojerez@riseup.net> Link: https://patchwork.freedesktop.org/patch/msgid/20220414192230.749771-4-matthew.d.roper@intel.com
2022-04-14 19:22:30 +00:00
#define DRM_I915_QUERY_GEOMETRY_SUBSLICES 6
#define DRM_I915_QUERY_GUC_SUBMISSION_VERSION 7
/* Must be kept compact -- no holes and well documented */
drm/i915: add query uAPI There are a number of information that are readable from hardware registers and that we would like to make accessible to userspace. One particular example is the topology of the execution units (how are execution units grouped in subslices and slices and also which ones have been fused off for die recovery). At the moment the GET_PARAM ioctl covers some basic needs, but generally is only able to return a single value for each defined parameter. This is a bit problematic with topology descriptions which are array/maps of available units. This change introduces a new ioctl that can deal with requests to fill structures of potentially variable lengths. The user is expected fill a query with length fields set at 0 on the first call, the kernel then sets the length fields to the their expected values. A second call to the kernel with length fields at their expected values will trigger a copy of the data to the pointed memory locations. The scope of this uAPI is only to provide information to userspace, not to allow configuration of the device. v2: Simplify dispatcher code iteration (Tvrtko) Tweak uapi drm_i915_query_item structure (Tvrtko) v3: Rename pad fields into flags (Chris) Return error on flags field != 0 (Chris) Only copy length back to userspace in drm_i915_query_item (Chris) v4: Use array of functions instead of switch (Chris) v5: More comments in uapi (Tvrtko) Return query item errors in length field (All) v6: Tweak uapi comments style to match the coding style (Lionel) v7: Add i915_query.h (Joonas) v8: (Lionel) Change the behavior of the item iterator to report invalid queries into the query item rather than stopping the iteration. This enables userspace applications to query newer items on older kernels and only have failure on the items that are not supported. v9: Edit copyright headers (Joonas) v10: Typos & comments in uapi (Joonas) Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Acked-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20180306122857.27317-6-lionel.g.landwerlin@intel.com
2018-03-06 12:28:56 +00:00
/**
* @length:
*
drm/i915: add query uAPI There are a number of information that are readable from hardware registers and that we would like to make accessible to userspace. One particular example is the topology of the execution units (how are execution units grouped in subslices and slices and also which ones have been fused off for die recovery). At the moment the GET_PARAM ioctl covers some basic needs, but generally is only able to return a single value for each defined parameter. This is a bit problematic with topology descriptions which are array/maps of available units. This change introduces a new ioctl that can deal with requests to fill structures of potentially variable lengths. The user is expected fill a query with length fields set at 0 on the first call, the kernel then sets the length fields to the their expected values. A second call to the kernel with length fields at their expected values will trigger a copy of the data to the pointed memory locations. The scope of this uAPI is only to provide information to userspace, not to allow configuration of the device. v2: Simplify dispatcher code iteration (Tvrtko) Tweak uapi drm_i915_query_item structure (Tvrtko) v3: Rename pad fields into flags (Chris) Return error on flags field != 0 (Chris) Only copy length back to userspace in drm_i915_query_item (Chris) v4: Use array of functions instead of switch (Chris) v5: More comments in uapi (Tvrtko) Return query item errors in length field (All) v6: Tweak uapi comments style to match the coding style (Lionel) v7: Add i915_query.h (Joonas) v8: (Lionel) Change the behavior of the item iterator to report invalid queries into the query item rather than stopping the iteration. This enables userspace applications to query newer items on older kernels and only have failure on the items that are not supported. v9: Edit copyright headers (Joonas) v10: Typos & comments in uapi (Joonas) Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Acked-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20180306122857.27317-6-lionel.g.landwerlin@intel.com
2018-03-06 12:28:56 +00:00
* When set to zero by userspace, this is filled with the size of the
* data to be written at the @data_ptr pointer. The kernel sets this
drm/i915: add query uAPI There are a number of information that are readable from hardware registers and that we would like to make accessible to userspace. One particular example is the topology of the execution units (how are execution units grouped in subslices and slices and also which ones have been fused off for die recovery). At the moment the GET_PARAM ioctl covers some basic needs, but generally is only able to return a single value for each defined parameter. This is a bit problematic with topology descriptions which are array/maps of available units. This change introduces a new ioctl that can deal with requests to fill structures of potentially variable lengths. The user is expected fill a query with length fields set at 0 on the first call, the kernel then sets the length fields to the their expected values. A second call to the kernel with length fields at their expected values will trigger a copy of the data to the pointed memory locations. The scope of this uAPI is only to provide information to userspace, not to allow configuration of the device. v2: Simplify dispatcher code iteration (Tvrtko) Tweak uapi drm_i915_query_item structure (Tvrtko) v3: Rename pad fields into flags (Chris) Return error on flags field != 0 (Chris) Only copy length back to userspace in drm_i915_query_item (Chris) v4: Use array of functions instead of switch (Chris) v5: More comments in uapi (Tvrtko) Return query item errors in length field (All) v6: Tweak uapi comments style to match the coding style (Lionel) v7: Add i915_query.h (Joonas) v8: (Lionel) Change the behavior of the item iterator to report invalid queries into the query item rather than stopping the iteration. This enables userspace applications to query newer items on older kernels and only have failure on the items that are not supported. v9: Edit copyright headers (Joonas) v10: Typos & comments in uapi (Joonas) Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Acked-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20180306122857.27317-6-lionel.g.landwerlin@intel.com
2018-03-06 12:28:56 +00:00
* value to a negative value to signal an error on a particular query
* item.
*/
__s32 length;
/**
* @flags:
*
* When &query_id == %DRM_I915_QUERY_TOPOLOGY_INFO, must be 0.
*
* When &query_id == %DRM_I915_QUERY_PERF_CONFIG, must be one of the
* following:
*
* - %DRM_I915_QUERY_PERF_CONFIG_LIST
* - %DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID
* - %DRM_I915_QUERY_PERF_CONFIG_FOR_UUID
drm/i915/uapi: Add DRM_I915_QUERY_GEOMETRY_SUBSLICES Newer platforms have DSS that aren't necessarily available for both geometry and compute, two queries will need to exist. This introduces the first, when passing a valid engine class and engine instance in the flags returns a topology describing geometry. Based on past discussion, we currently only support this new query item on Xe_HP and beyond; earlier platforms do not need to worry about geometry and compute pipelines having access to different topology and should continue to use the existing topology query. v2: fix white space errors v3: change flags from hosting 2 8 bit numbers to holding a i915_engine_class_instance struct v4: add error if non rcs engine passed. v5 (by MattR): - Improve kerneldoc and cross references to related structs/enums. (Daniel) - Clarify that geometry query is only supported on render engines (Francisco) - Clarify that the new query is only supported on Xe_HP+. - Fix checkpatch warnings. Cc: Ashutosh Dixit <ashutosh.dixit@intel.com> Cc: Matt Roper <matthew.d.roper@intel.com> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Francisco Jerez <currojerez@riseup.net> UMD (mesa): https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14143 Testcase: igt@i915_query@test-query-geometry-subslices Signed-off-by: Matt Atwood <matthew.s.atwood@intel.com> Signed-off-by: Matt Roper <matthew.d.roper@intel.com> Reviewed-by: Francisco Jerez <currojerez@riseup.net> Link: https://patchwork.freedesktop.org/patch/msgid/20220414192230.749771-4-matthew.d.roper@intel.com
2022-04-14 19:22:30 +00:00
*
* When &query_id == %DRM_I915_QUERY_GEOMETRY_SUBSLICES must contain
* a struct i915_engine_class_instance that references a render engine.
drm/i915: add query uAPI There are a number of information that are readable from hardware registers and that we would like to make accessible to userspace. One particular example is the topology of the execution units (how are execution units grouped in subslices and slices and also which ones have been fused off for die recovery). At the moment the GET_PARAM ioctl covers some basic needs, but generally is only able to return a single value for each defined parameter. This is a bit problematic with topology descriptions which are array/maps of available units. This change introduces a new ioctl that can deal with requests to fill structures of potentially variable lengths. The user is expected fill a query with length fields set at 0 on the first call, the kernel then sets the length fields to the their expected values. A second call to the kernel with length fields at their expected values will trigger a copy of the data to the pointed memory locations. The scope of this uAPI is only to provide information to userspace, not to allow configuration of the device. v2: Simplify dispatcher code iteration (Tvrtko) Tweak uapi drm_i915_query_item structure (Tvrtko) v3: Rename pad fields into flags (Chris) Return error on flags field != 0 (Chris) Only copy length back to userspace in drm_i915_query_item (Chris) v4: Use array of functions instead of switch (Chris) v5: More comments in uapi (Tvrtko) Return query item errors in length field (All) v6: Tweak uapi comments style to match the coding style (Lionel) v7: Add i915_query.h (Joonas) v8: (Lionel) Change the behavior of the item iterator to report invalid queries into the query item rather than stopping the iteration. This enables userspace applications to query newer items on older kernels and only have failure on the items that are not supported. v9: Edit copyright headers (Joonas) v10: Typos & comments in uapi (Joonas) Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Acked-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20180306122857.27317-6-lionel.g.landwerlin@intel.com
2018-03-06 12:28:56 +00:00
*/
__u32 flags;
#define DRM_I915_QUERY_PERF_CONFIG_LIST 1
#define DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID 2
#define DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_ID 3
drm/i915: add query uAPI There are a number of information that are readable from hardware registers and that we would like to make accessible to userspace. One particular example is the topology of the execution units (how are execution units grouped in subslices and slices and also which ones have been fused off for die recovery). At the moment the GET_PARAM ioctl covers some basic needs, but generally is only able to return a single value for each defined parameter. This is a bit problematic with topology descriptions which are array/maps of available units. This change introduces a new ioctl that can deal with requests to fill structures of potentially variable lengths. The user is expected fill a query with length fields set at 0 on the first call, the kernel then sets the length fields to the their expected values. A second call to the kernel with length fields at their expected values will trigger a copy of the data to the pointed memory locations. The scope of this uAPI is only to provide information to userspace, not to allow configuration of the device. v2: Simplify dispatcher code iteration (Tvrtko) Tweak uapi drm_i915_query_item structure (Tvrtko) v3: Rename pad fields into flags (Chris) Return error on flags field != 0 (Chris) Only copy length back to userspace in drm_i915_query_item (Chris) v4: Use array of functions instead of switch (Chris) v5: More comments in uapi (Tvrtko) Return query item errors in length field (All) v6: Tweak uapi comments style to match the coding style (Lionel) v7: Add i915_query.h (Joonas) v8: (Lionel) Change the behavior of the item iterator to report invalid queries into the query item rather than stopping the iteration. This enables userspace applications to query newer items on older kernels and only have failure on the items that are not supported. v9: Edit copyright headers (Joonas) v10: Typos & comments in uapi (Joonas) Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Acked-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20180306122857.27317-6-lionel.g.landwerlin@intel.com
2018-03-06 12:28:56 +00:00
/**
* @data_ptr:
*
* Data will be written at the location pointed by @data_ptr when the
* value of @length matches the length of the data to be written by the
drm/i915: add query uAPI There are a number of information that are readable from hardware registers and that we would like to make accessible to userspace. One particular example is the topology of the execution units (how are execution units grouped in subslices and slices and also which ones have been fused off for die recovery). At the moment the GET_PARAM ioctl covers some basic needs, but generally is only able to return a single value for each defined parameter. This is a bit problematic with topology descriptions which are array/maps of available units. This change introduces a new ioctl that can deal with requests to fill structures of potentially variable lengths. The user is expected fill a query with length fields set at 0 on the first call, the kernel then sets the length fields to the their expected values. A second call to the kernel with length fields at their expected values will trigger a copy of the data to the pointed memory locations. The scope of this uAPI is only to provide information to userspace, not to allow configuration of the device. v2: Simplify dispatcher code iteration (Tvrtko) Tweak uapi drm_i915_query_item structure (Tvrtko) v3: Rename pad fields into flags (Chris) Return error on flags field != 0 (Chris) Only copy length back to userspace in drm_i915_query_item (Chris) v4: Use array of functions instead of switch (Chris) v5: More comments in uapi (Tvrtko) Return query item errors in length field (All) v6: Tweak uapi comments style to match the coding style (Lionel) v7: Add i915_query.h (Joonas) v8: (Lionel) Change the behavior of the item iterator to report invalid queries into the query item rather than stopping the iteration. This enables userspace applications to query newer items on older kernels and only have failure on the items that are not supported. v9: Edit copyright headers (Joonas) v10: Typos & comments in uapi (Joonas) Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Acked-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20180306122857.27317-6-lionel.g.landwerlin@intel.com
2018-03-06 12:28:56 +00:00
* kernel.
*/
__u64 data_ptr;
};
/**
* struct drm_i915_query - Supply an array of struct drm_i915_query_item for the
* kernel to fill out.
*
* Note that this is generally a two step process for each struct
* drm_i915_query_item in the array:
*
* 1. Call the DRM_IOCTL_I915_QUERY, giving it our array of struct
* drm_i915_query_item, with &drm_i915_query_item.length set to zero. The
* kernel will then fill in the size, in bytes, which tells userspace how
* memory it needs to allocate for the blob(say for an array of properties).
*
* 2. Next we call DRM_IOCTL_I915_QUERY again, this time with the
* &drm_i915_query_item.data_ptr equal to our newly allocated blob. Note that
* the &drm_i915_query_item.length should still be the same as what the
* kernel previously set. At this point the kernel can fill in the blob.
*
* Note that for some query items it can make sense for userspace to just pass
* in a buffer/blob equal to or larger than the required size. In this case only
* a single ioctl call is needed. For some smaller query items this can work
* quite well.
*
*/
drm/i915: add query uAPI There are a number of information that are readable from hardware registers and that we would like to make accessible to userspace. One particular example is the topology of the execution units (how are execution units grouped in subslices and slices and also which ones have been fused off for die recovery). At the moment the GET_PARAM ioctl covers some basic needs, but generally is only able to return a single value for each defined parameter. This is a bit problematic with topology descriptions which are array/maps of available units. This change introduces a new ioctl that can deal with requests to fill structures of potentially variable lengths. The user is expected fill a query with length fields set at 0 on the first call, the kernel then sets the length fields to the their expected values. A second call to the kernel with length fields at their expected values will trigger a copy of the data to the pointed memory locations. The scope of this uAPI is only to provide information to userspace, not to allow configuration of the device. v2: Simplify dispatcher code iteration (Tvrtko) Tweak uapi drm_i915_query_item structure (Tvrtko) v3: Rename pad fields into flags (Chris) Return error on flags field != 0 (Chris) Only copy length back to userspace in drm_i915_query_item (Chris) v4: Use array of functions instead of switch (Chris) v5: More comments in uapi (Tvrtko) Return query item errors in length field (All) v6: Tweak uapi comments style to match the coding style (Lionel) v7: Add i915_query.h (Joonas) v8: (Lionel) Change the behavior of the item iterator to report invalid queries into the query item rather than stopping the iteration. This enables userspace applications to query newer items on older kernels and only have failure on the items that are not supported. v9: Edit copyright headers (Joonas) v10: Typos & comments in uapi (Joonas) Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Acked-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20180306122857.27317-6-lionel.g.landwerlin@intel.com
2018-03-06 12:28:56 +00:00
struct drm_i915_query {
/** @num_items: The number of elements in the @items_ptr array */
drm/i915: add query uAPI There are a number of information that are readable from hardware registers and that we would like to make accessible to userspace. One particular example is the topology of the execution units (how are execution units grouped in subslices and slices and also which ones have been fused off for die recovery). At the moment the GET_PARAM ioctl covers some basic needs, but generally is only able to return a single value for each defined parameter. This is a bit problematic with topology descriptions which are array/maps of available units. This change introduces a new ioctl that can deal with requests to fill structures of potentially variable lengths. The user is expected fill a query with length fields set at 0 on the first call, the kernel then sets the length fields to the their expected values. A second call to the kernel with length fields at their expected values will trigger a copy of the data to the pointed memory locations. The scope of this uAPI is only to provide information to userspace, not to allow configuration of the device. v2: Simplify dispatcher code iteration (Tvrtko) Tweak uapi drm_i915_query_item structure (Tvrtko) v3: Rename pad fields into flags (Chris) Return error on flags field != 0 (Chris) Only copy length back to userspace in drm_i915_query_item (Chris) v4: Use array of functions instead of switch (Chris) v5: More comments in uapi (Tvrtko) Return query item errors in length field (All) v6: Tweak uapi comments style to match the coding style (Lionel) v7: Add i915_query.h (Joonas) v8: (Lionel) Change the behavior of the item iterator to report invalid queries into the query item rather than stopping the iteration. This enables userspace applications to query newer items on older kernels and only have failure on the items that are not supported. v9: Edit copyright headers (Joonas) v10: Typos & comments in uapi (Joonas) Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Acked-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20180306122857.27317-6-lionel.g.landwerlin@intel.com
2018-03-06 12:28:56 +00:00
__u32 num_items;
/**
* @flags: Unused for now. Must be cleared to zero.
drm/i915: add query uAPI There are a number of information that are readable from hardware registers and that we would like to make accessible to userspace. One particular example is the topology of the execution units (how are execution units grouped in subslices and slices and also which ones have been fused off for die recovery). At the moment the GET_PARAM ioctl covers some basic needs, but generally is only able to return a single value for each defined parameter. This is a bit problematic with topology descriptions which are array/maps of available units. This change introduces a new ioctl that can deal with requests to fill structures of potentially variable lengths. The user is expected fill a query with length fields set at 0 on the first call, the kernel then sets the length fields to the their expected values. A second call to the kernel with length fields at their expected values will trigger a copy of the data to the pointed memory locations. The scope of this uAPI is only to provide information to userspace, not to allow configuration of the device. v2: Simplify dispatcher code iteration (Tvrtko) Tweak uapi drm_i915_query_item structure (Tvrtko) v3: Rename pad fields into flags (Chris) Return error on flags field != 0 (Chris) Only copy length back to userspace in drm_i915_query_item (Chris) v4: Use array of functions instead of switch (Chris) v5: More comments in uapi (Tvrtko) Return query item errors in length field (All) v6: Tweak uapi comments style to match the coding style (Lionel) v7: Add i915_query.h (Joonas) v8: (Lionel) Change the behavior of the item iterator to report invalid queries into the query item rather than stopping the iteration. This enables userspace applications to query newer items on older kernels and only have failure on the items that are not supported. v9: Edit copyright headers (Joonas) v10: Typos & comments in uapi (Joonas) Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Acked-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20180306122857.27317-6-lionel.g.landwerlin@intel.com
2018-03-06 12:28:56 +00:00
*/
__u32 flags;
/**
* @items_ptr:
*
* Pointer to an array of struct drm_i915_query_item. The number of
* array elements is @num_items.
drm/i915: add query uAPI There are a number of information that are readable from hardware registers and that we would like to make accessible to userspace. One particular example is the topology of the execution units (how are execution units grouped in subslices and slices and also which ones have been fused off for die recovery). At the moment the GET_PARAM ioctl covers some basic needs, but generally is only able to return a single value for each defined parameter. This is a bit problematic with topology descriptions which are array/maps of available units. This change introduces a new ioctl that can deal with requests to fill structures of potentially variable lengths. The user is expected fill a query with length fields set at 0 on the first call, the kernel then sets the length fields to the their expected values. A second call to the kernel with length fields at their expected values will trigger a copy of the data to the pointed memory locations. The scope of this uAPI is only to provide information to userspace, not to allow configuration of the device. v2: Simplify dispatcher code iteration (Tvrtko) Tweak uapi drm_i915_query_item structure (Tvrtko) v3: Rename pad fields into flags (Chris) Return error on flags field != 0 (Chris) Only copy length back to userspace in drm_i915_query_item (Chris) v4: Use array of functions instead of switch (Chris) v5: More comments in uapi (Tvrtko) Return query item errors in length field (All) v6: Tweak uapi comments style to match the coding style (Lionel) v7: Add i915_query.h (Joonas) v8: (Lionel) Change the behavior of the item iterator to report invalid queries into the query item rather than stopping the iteration. This enables userspace applications to query newer items on older kernels and only have failure on the items that are not supported. v9: Edit copyright headers (Joonas) v10: Typos & comments in uapi (Joonas) Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Acked-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20180306122857.27317-6-lionel.g.landwerlin@intel.com
2018-03-06 12:28:56 +00:00
*/
__u64 items_ptr;
};
/**
* struct drm_i915_query_topology_info
drm/i915: expose rcs topology through query uAPI With the introduction of asymmetric slices in CNL, we cannot rely on the previous SUBSLICE_MASK getparam to tell userspace what subslices are available. Here we introduce a more detailed way of querying the Gen's GPU topology that doesn't aggregate numbers. This is essential for monitoring parts of the GPU with the OA unit, because counters need to be normalized to the number of EUs/subslices/slices. The current aggregated numbers like EU_TOTAL do not gives us sufficient information. The Mesa series making use of this API is : https://patchwork.freedesktop.org/series/38795/ As a bonus we can draw representations of the GPU : https://imgur.com/a/vuqpa v2: Rename uapi struct s/_mask/_info/ (Tvrtko) Report max_slice/subslice/eus_per_subslice rather than strides (Tvrtko) Add uapi macros to read data from *_info structs (Tvrtko) v3: Use !!(v & DRM_I915_BIT()) for uapi macros instead of custom shifts (Tvrtko) v4: factorize query item writting (Tvrtko) tweak uapi struct/define names (Tvrtko) v5: Replace ALIGN() macro (Chris) v6: Updated uapi comments (Tvrtko) Moved flags != 0 checks into vfuncs (Tvrtko) v7: Use access_ok() before copying anything, to avoid overflows (Chris) Switch BUG_ON() to GEM_WARN_ON() (Tvrtko) v8: Tweak uapi comments style to match the coding style (Lionel) v9: Fix error in comment about computation of enabled subslice (Tvrtko) v10: Fix/update comments in uAPI (Sagar) v11: Drop drm_i915_query_(slice|subslice|eu)_info in favor of a single drm_i915_query_topology_info (Joonas) v12: Add subslice_stride/eu_stride in drm_i915_query_topology_info (Joonas) v13: Fix comment in uAPI (Joonas) Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Acked-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20180306122857.27317-7-lionel.g.landwerlin@intel.com
2018-03-06 12:28:57 +00:00
*
* Describes slice/subslice/EU information queried by
* %DRM_I915_QUERY_TOPOLOGY_INFO
drm/i915: expose rcs topology through query uAPI With the introduction of asymmetric slices in CNL, we cannot rely on the previous SUBSLICE_MASK getparam to tell userspace what subslices are available. Here we introduce a more detailed way of querying the Gen's GPU topology that doesn't aggregate numbers. This is essential for monitoring parts of the GPU with the OA unit, because counters need to be normalized to the number of EUs/subslices/slices. The current aggregated numbers like EU_TOTAL do not gives us sufficient information. The Mesa series making use of this API is : https://patchwork.freedesktop.org/series/38795/ As a bonus we can draw representations of the GPU : https://imgur.com/a/vuqpa v2: Rename uapi struct s/_mask/_info/ (Tvrtko) Report max_slice/subslice/eus_per_subslice rather than strides (Tvrtko) Add uapi macros to read data from *_info structs (Tvrtko) v3: Use !!(v & DRM_I915_BIT()) for uapi macros instead of custom shifts (Tvrtko) v4: factorize query item writting (Tvrtko) tweak uapi struct/define names (Tvrtko) v5: Replace ALIGN() macro (Chris) v6: Updated uapi comments (Tvrtko) Moved flags != 0 checks into vfuncs (Tvrtko) v7: Use access_ok() before copying anything, to avoid overflows (Chris) Switch BUG_ON() to GEM_WARN_ON() (Tvrtko) v8: Tweak uapi comments style to match the coding style (Lionel) v9: Fix error in comment about computation of enabled subslice (Tvrtko) v10: Fix/update comments in uAPI (Sagar) v11: Drop drm_i915_query_(slice|subslice|eu)_info in favor of a single drm_i915_query_topology_info (Joonas) v12: Add subslice_stride/eu_stride in drm_i915_query_topology_info (Joonas) v13: Fix comment in uAPI (Joonas) Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Acked-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20180306122857.27317-7-lionel.g.landwerlin@intel.com
2018-03-06 12:28:57 +00:00
*/
struct drm_i915_query_topology_info {
/**
* @flags:
*
drm/i915: expose rcs topology through query uAPI With the introduction of asymmetric slices in CNL, we cannot rely on the previous SUBSLICE_MASK getparam to tell userspace what subslices are available. Here we introduce a more detailed way of querying the Gen's GPU topology that doesn't aggregate numbers. This is essential for monitoring parts of the GPU with the OA unit, because counters need to be normalized to the number of EUs/subslices/slices. The current aggregated numbers like EU_TOTAL do not gives us sufficient information. The Mesa series making use of this API is : https://patchwork.freedesktop.org/series/38795/ As a bonus we can draw representations of the GPU : https://imgur.com/a/vuqpa v2: Rename uapi struct s/_mask/_info/ (Tvrtko) Report max_slice/subslice/eus_per_subslice rather than strides (Tvrtko) Add uapi macros to read data from *_info structs (Tvrtko) v3: Use !!(v & DRM_I915_BIT()) for uapi macros instead of custom shifts (Tvrtko) v4: factorize query item writting (Tvrtko) tweak uapi struct/define names (Tvrtko) v5: Replace ALIGN() macro (Chris) v6: Updated uapi comments (Tvrtko) Moved flags != 0 checks into vfuncs (Tvrtko) v7: Use access_ok() before copying anything, to avoid overflows (Chris) Switch BUG_ON() to GEM_WARN_ON() (Tvrtko) v8: Tweak uapi comments style to match the coding style (Lionel) v9: Fix error in comment about computation of enabled subslice (Tvrtko) v10: Fix/update comments in uAPI (Sagar) v11: Drop drm_i915_query_(slice|subslice|eu)_info in favor of a single drm_i915_query_topology_info (Joonas) v12: Add subslice_stride/eu_stride in drm_i915_query_topology_info (Joonas) v13: Fix comment in uAPI (Joonas) Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Acked-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20180306122857.27317-7-lionel.g.landwerlin@intel.com
2018-03-06 12:28:57 +00:00
* Unused for now. Must be cleared to zero.
*/
__u16 flags;
/**
* @max_slices:
*
* The number of bits used to express the slice mask.
*/
drm/i915: expose rcs topology through query uAPI With the introduction of asymmetric slices in CNL, we cannot rely on the previous SUBSLICE_MASK getparam to tell userspace what subslices are available. Here we introduce a more detailed way of querying the Gen's GPU topology that doesn't aggregate numbers. This is essential for monitoring parts of the GPU with the OA unit, because counters need to be normalized to the number of EUs/subslices/slices. The current aggregated numbers like EU_TOTAL do not gives us sufficient information. The Mesa series making use of this API is : https://patchwork.freedesktop.org/series/38795/ As a bonus we can draw representations of the GPU : https://imgur.com/a/vuqpa v2: Rename uapi struct s/_mask/_info/ (Tvrtko) Report max_slice/subslice/eus_per_subslice rather than strides (Tvrtko) Add uapi macros to read data from *_info structs (Tvrtko) v3: Use !!(v & DRM_I915_BIT()) for uapi macros instead of custom shifts (Tvrtko) v4: factorize query item writting (Tvrtko) tweak uapi struct/define names (Tvrtko) v5: Replace ALIGN() macro (Chris) v6: Updated uapi comments (Tvrtko) Moved flags != 0 checks into vfuncs (Tvrtko) v7: Use access_ok() before copying anything, to avoid overflows (Chris) Switch BUG_ON() to GEM_WARN_ON() (Tvrtko) v8: Tweak uapi comments style to match the coding style (Lionel) v9: Fix error in comment about computation of enabled subslice (Tvrtko) v10: Fix/update comments in uAPI (Sagar) v11: Drop drm_i915_query_(slice|subslice|eu)_info in favor of a single drm_i915_query_topology_info (Joonas) v12: Add subslice_stride/eu_stride in drm_i915_query_topology_info (Joonas) v13: Fix comment in uAPI (Joonas) Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Acked-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20180306122857.27317-7-lionel.g.landwerlin@intel.com
2018-03-06 12:28:57 +00:00
__u16 max_slices;
/**
* @max_subslices:
*
* The number of bits used to express the subslice mask.
*/
drm/i915: expose rcs topology through query uAPI With the introduction of asymmetric slices in CNL, we cannot rely on the previous SUBSLICE_MASK getparam to tell userspace what subslices are available. Here we introduce a more detailed way of querying the Gen's GPU topology that doesn't aggregate numbers. This is essential for monitoring parts of the GPU with the OA unit, because counters need to be normalized to the number of EUs/subslices/slices. The current aggregated numbers like EU_TOTAL do not gives us sufficient information. The Mesa series making use of this API is : https://patchwork.freedesktop.org/series/38795/ As a bonus we can draw representations of the GPU : https://imgur.com/a/vuqpa v2: Rename uapi struct s/_mask/_info/ (Tvrtko) Report max_slice/subslice/eus_per_subslice rather than strides (Tvrtko) Add uapi macros to read data from *_info structs (Tvrtko) v3: Use !!(v & DRM_I915_BIT()) for uapi macros instead of custom shifts (Tvrtko) v4: factorize query item writting (Tvrtko) tweak uapi struct/define names (Tvrtko) v5: Replace ALIGN() macro (Chris) v6: Updated uapi comments (Tvrtko) Moved flags != 0 checks into vfuncs (Tvrtko) v7: Use access_ok() before copying anything, to avoid overflows (Chris) Switch BUG_ON() to GEM_WARN_ON() (Tvrtko) v8: Tweak uapi comments style to match the coding style (Lionel) v9: Fix error in comment about computation of enabled subslice (Tvrtko) v10: Fix/update comments in uAPI (Sagar) v11: Drop drm_i915_query_(slice|subslice|eu)_info in favor of a single drm_i915_query_topology_info (Joonas) v12: Add subslice_stride/eu_stride in drm_i915_query_topology_info (Joonas) v13: Fix comment in uAPI (Joonas) Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Acked-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20180306122857.27317-7-lionel.g.landwerlin@intel.com
2018-03-06 12:28:57 +00:00
__u16 max_subslices;
/**
* @max_eus_per_subslice:
*
* The number of bits in the EU mask that correspond to a single
* subslice's EUs.
*/
drm/i915: expose rcs topology through query uAPI With the introduction of asymmetric slices in CNL, we cannot rely on the previous SUBSLICE_MASK getparam to tell userspace what subslices are available. Here we introduce a more detailed way of querying the Gen's GPU topology that doesn't aggregate numbers. This is essential for monitoring parts of the GPU with the OA unit, because counters need to be normalized to the number of EUs/subslices/slices. The current aggregated numbers like EU_TOTAL do not gives us sufficient information. The Mesa series making use of this API is : https://patchwork.freedesktop.org/series/38795/ As a bonus we can draw representations of the GPU : https://imgur.com/a/vuqpa v2: Rename uapi struct s/_mask/_info/ (Tvrtko) Report max_slice/subslice/eus_per_subslice rather than strides (Tvrtko) Add uapi macros to read data from *_info structs (Tvrtko) v3: Use !!(v & DRM_I915_BIT()) for uapi macros instead of custom shifts (Tvrtko) v4: factorize query item writting (Tvrtko) tweak uapi struct/define names (Tvrtko) v5: Replace ALIGN() macro (Chris) v6: Updated uapi comments (Tvrtko) Moved flags != 0 checks into vfuncs (Tvrtko) v7: Use access_ok() before copying anything, to avoid overflows (Chris) Switch BUG_ON() to GEM_WARN_ON() (Tvrtko) v8: Tweak uapi comments style to match the coding style (Lionel) v9: Fix error in comment about computation of enabled subslice (Tvrtko) v10: Fix/update comments in uAPI (Sagar) v11: Drop drm_i915_query_(slice|subslice|eu)_info in favor of a single drm_i915_query_topology_info (Joonas) v12: Add subslice_stride/eu_stride in drm_i915_query_topology_info (Joonas) v13: Fix comment in uAPI (Joonas) Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Acked-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20180306122857.27317-7-lionel.g.landwerlin@intel.com
2018-03-06 12:28:57 +00:00
__u16 max_eus_per_subslice;
/**
* @subslice_offset:
*
drm/i915: expose rcs topology through query uAPI With the introduction of asymmetric slices in CNL, we cannot rely on the previous SUBSLICE_MASK getparam to tell userspace what subslices are available. Here we introduce a more detailed way of querying the Gen's GPU topology that doesn't aggregate numbers. This is essential for monitoring parts of the GPU with the OA unit, because counters need to be normalized to the number of EUs/subslices/slices. The current aggregated numbers like EU_TOTAL do not gives us sufficient information. The Mesa series making use of this API is : https://patchwork.freedesktop.org/series/38795/ As a bonus we can draw representations of the GPU : https://imgur.com/a/vuqpa v2: Rename uapi struct s/_mask/_info/ (Tvrtko) Report max_slice/subslice/eus_per_subslice rather than strides (Tvrtko) Add uapi macros to read data from *_info structs (Tvrtko) v3: Use !!(v & DRM_I915_BIT()) for uapi macros instead of custom shifts (Tvrtko) v4: factorize query item writting (Tvrtko) tweak uapi struct/define names (Tvrtko) v5: Replace ALIGN() macro (Chris) v6: Updated uapi comments (Tvrtko) Moved flags != 0 checks into vfuncs (Tvrtko) v7: Use access_ok() before copying anything, to avoid overflows (Chris) Switch BUG_ON() to GEM_WARN_ON() (Tvrtko) v8: Tweak uapi comments style to match the coding style (Lionel) v9: Fix error in comment about computation of enabled subslice (Tvrtko) v10: Fix/update comments in uAPI (Sagar) v11: Drop drm_i915_query_(slice|subslice|eu)_info in favor of a single drm_i915_query_topology_info (Joonas) v12: Add subslice_stride/eu_stride in drm_i915_query_topology_info (Joonas) v13: Fix comment in uAPI (Joonas) Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Acked-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20180306122857.27317-7-lionel.g.landwerlin@intel.com
2018-03-06 12:28:57 +00:00
* Offset in data[] at which the subslice masks are stored.
*/
__u16 subslice_offset;
/**
* @subslice_stride:
*
drm/i915: expose rcs topology through query uAPI With the introduction of asymmetric slices in CNL, we cannot rely on the previous SUBSLICE_MASK getparam to tell userspace what subslices are available. Here we introduce a more detailed way of querying the Gen's GPU topology that doesn't aggregate numbers. This is essential for monitoring parts of the GPU with the OA unit, because counters need to be normalized to the number of EUs/subslices/slices. The current aggregated numbers like EU_TOTAL do not gives us sufficient information. The Mesa series making use of this API is : https://patchwork.freedesktop.org/series/38795/ As a bonus we can draw representations of the GPU : https://imgur.com/a/vuqpa v2: Rename uapi struct s/_mask/_info/ (Tvrtko) Report max_slice/subslice/eus_per_subslice rather than strides (Tvrtko) Add uapi macros to read data from *_info structs (Tvrtko) v3: Use !!(v & DRM_I915_BIT()) for uapi macros instead of custom shifts (Tvrtko) v4: factorize query item writting (Tvrtko) tweak uapi struct/define names (Tvrtko) v5: Replace ALIGN() macro (Chris) v6: Updated uapi comments (Tvrtko) Moved flags != 0 checks into vfuncs (Tvrtko) v7: Use access_ok() before copying anything, to avoid overflows (Chris) Switch BUG_ON() to GEM_WARN_ON() (Tvrtko) v8: Tweak uapi comments style to match the coding style (Lionel) v9: Fix error in comment about computation of enabled subslice (Tvrtko) v10: Fix/update comments in uAPI (Sagar) v11: Drop drm_i915_query_(slice|subslice|eu)_info in favor of a single drm_i915_query_topology_info (Joonas) v12: Add subslice_stride/eu_stride in drm_i915_query_topology_info (Joonas) v13: Fix comment in uAPI (Joonas) Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Acked-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20180306122857.27317-7-lionel.g.landwerlin@intel.com
2018-03-06 12:28:57 +00:00
* Stride at which each of the subslice masks for each slice are
* stored.
*/
__u16 subslice_stride;
/**
* @eu_offset:
*
drm/i915: expose rcs topology through query uAPI With the introduction of asymmetric slices in CNL, we cannot rely on the previous SUBSLICE_MASK getparam to tell userspace what subslices are available. Here we introduce a more detailed way of querying the Gen's GPU topology that doesn't aggregate numbers. This is essential for monitoring parts of the GPU with the OA unit, because counters need to be normalized to the number of EUs/subslices/slices. The current aggregated numbers like EU_TOTAL do not gives us sufficient information. The Mesa series making use of this API is : https://patchwork.freedesktop.org/series/38795/ As a bonus we can draw representations of the GPU : https://imgur.com/a/vuqpa v2: Rename uapi struct s/_mask/_info/ (Tvrtko) Report max_slice/subslice/eus_per_subslice rather than strides (Tvrtko) Add uapi macros to read data from *_info structs (Tvrtko) v3: Use !!(v & DRM_I915_BIT()) for uapi macros instead of custom shifts (Tvrtko) v4: factorize query item writting (Tvrtko) tweak uapi struct/define names (Tvrtko) v5: Replace ALIGN() macro (Chris) v6: Updated uapi comments (Tvrtko) Moved flags != 0 checks into vfuncs (Tvrtko) v7: Use access_ok() before copying anything, to avoid overflows (Chris) Switch BUG_ON() to GEM_WARN_ON() (Tvrtko) v8: Tweak uapi comments style to match the coding style (Lionel) v9: Fix error in comment about computation of enabled subslice (Tvrtko) v10: Fix/update comments in uAPI (Sagar) v11: Drop drm_i915_query_(slice|subslice|eu)_info in favor of a single drm_i915_query_topology_info (Joonas) v12: Add subslice_stride/eu_stride in drm_i915_query_topology_info (Joonas) v13: Fix comment in uAPI (Joonas) Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Acked-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20180306122857.27317-7-lionel.g.landwerlin@intel.com
2018-03-06 12:28:57 +00:00
* Offset in data[] at which the EU masks are stored.
*/
__u16 eu_offset;
/**
* @eu_stride:
*
drm/i915: expose rcs topology through query uAPI With the introduction of asymmetric slices in CNL, we cannot rely on the previous SUBSLICE_MASK getparam to tell userspace what subslices are available. Here we introduce a more detailed way of querying the Gen's GPU topology that doesn't aggregate numbers. This is essential for monitoring parts of the GPU with the OA unit, because counters need to be normalized to the number of EUs/subslices/slices. The current aggregated numbers like EU_TOTAL do not gives us sufficient information. The Mesa series making use of this API is : https://patchwork.freedesktop.org/series/38795/ As a bonus we can draw representations of the GPU : https://imgur.com/a/vuqpa v2: Rename uapi struct s/_mask/_info/ (Tvrtko) Report max_slice/subslice/eus_per_subslice rather than strides (Tvrtko) Add uapi macros to read data from *_info structs (Tvrtko) v3: Use !!(v & DRM_I915_BIT()) for uapi macros instead of custom shifts (Tvrtko) v4: factorize query item writting (Tvrtko) tweak uapi struct/define names (Tvrtko) v5: Replace ALIGN() macro (Chris) v6: Updated uapi comments (Tvrtko) Moved flags != 0 checks into vfuncs (Tvrtko) v7: Use access_ok() before copying anything, to avoid overflows (Chris) Switch BUG_ON() to GEM_WARN_ON() (Tvrtko) v8: Tweak uapi comments style to match the coding style (Lionel) v9: Fix error in comment about computation of enabled subslice (Tvrtko) v10: Fix/update comments in uAPI (Sagar) v11: Drop drm_i915_query_(slice|subslice|eu)_info in favor of a single drm_i915_query_topology_info (Joonas) v12: Add subslice_stride/eu_stride in drm_i915_query_topology_info (Joonas) v13: Fix comment in uAPI (Joonas) Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Acked-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20180306122857.27317-7-lionel.g.landwerlin@intel.com
2018-03-06 12:28:57 +00:00
* Stride at which each of the EU masks for each subslice are stored.
*/
__u16 eu_stride;
/**
* @data:
*
* Contains 3 pieces of information :
*
* - The slice mask with one bit per slice telling whether a slice is
* available. The availability of slice X can be queried with the
* following formula :
*
* .. code:: c
*
* (data[X / 8] >> (X % 8)) & 1
*
* Starting with Xe_HP platforms, Intel hardware no longer has
* traditional slices so i915 will always report a single slice
* (hardcoded slicemask = 0x1) which contains all of the platform's
* subslices. I.e., the mask here does not reflect any of the newer
* hardware concepts such as "gslices" or "cslices" since userspace
* is capable of inferring those from the subslice mask.
*
* - The subslice mask for each slice with one bit per subslice telling
* whether a subslice is available. Starting with Gen12 we use the
* term "subslice" to refer to what the hardware documentation
* describes as a "dual-subslices." The availability of subslice Y
* in slice X can be queried with the following formula :
*
* .. code:: c
*
* (data[subslice_offset + X * subslice_stride + Y / 8] >> (Y % 8)) & 1
*
* - The EU mask for each subslice in each slice, with one bit per EU
* telling whether an EU is available. The availability of EU Z in
* subslice Y in slice X can be queried with the following formula :
*
* .. code:: c
*
* (data[eu_offset +
* (X * max_subslices + Y) * eu_stride +
* Z / 8
* ] >> (Z % 8)) & 1
*/
drm/i915: expose rcs topology through query uAPI With the introduction of asymmetric slices in CNL, we cannot rely on the previous SUBSLICE_MASK getparam to tell userspace what subslices are available. Here we introduce a more detailed way of querying the Gen's GPU topology that doesn't aggregate numbers. This is essential for monitoring parts of the GPU with the OA unit, because counters need to be normalized to the number of EUs/subslices/slices. The current aggregated numbers like EU_TOTAL do not gives us sufficient information. The Mesa series making use of this API is : https://patchwork.freedesktop.org/series/38795/ As a bonus we can draw representations of the GPU : https://imgur.com/a/vuqpa v2: Rename uapi struct s/_mask/_info/ (Tvrtko) Report max_slice/subslice/eus_per_subslice rather than strides (Tvrtko) Add uapi macros to read data from *_info structs (Tvrtko) v3: Use !!(v & DRM_I915_BIT()) for uapi macros instead of custom shifts (Tvrtko) v4: factorize query item writting (Tvrtko) tweak uapi struct/define names (Tvrtko) v5: Replace ALIGN() macro (Chris) v6: Updated uapi comments (Tvrtko) Moved flags != 0 checks into vfuncs (Tvrtko) v7: Use access_ok() before copying anything, to avoid overflows (Chris) Switch BUG_ON() to GEM_WARN_ON() (Tvrtko) v8: Tweak uapi comments style to match the coding style (Lionel) v9: Fix error in comment about computation of enabled subslice (Tvrtko) v10: Fix/update comments in uAPI (Sagar) v11: Drop drm_i915_query_(slice|subslice|eu)_info in favor of a single drm_i915_query_topology_info (Joonas) v12: Add subslice_stride/eu_stride in drm_i915_query_topology_info (Joonas) v13: Fix comment in uAPI (Joonas) Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Acked-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20180306122857.27317-7-lionel.g.landwerlin@intel.com
2018-03-06 12:28:57 +00:00
__u8 data[];
};
/**
* DOC: Engine Discovery uAPI
*
* Engine discovery uAPI is a way of enumerating physical engines present in a
* GPU associated with an open i915 DRM file descriptor. This supersedes the old
* way of using `DRM_IOCTL_I915_GETPARAM` and engine identifiers like
* `I915_PARAM_HAS_BLT`.
*
* The need for this interface came starting with Icelake and newer GPUs, which
* started to establish a pattern of having multiple engines of a same class,
* where not all instances were always completely functionally equivalent.
*
* Entry point for this uapi is `DRM_IOCTL_I915_QUERY` with the
* `DRM_I915_QUERY_ENGINE_INFO` as the queried item id.
*
* Example for getting the list of engines:
*
* .. code-block:: C
*
* struct drm_i915_query_engine_info *info;
* struct drm_i915_query_item item = {
* .query_id = DRM_I915_QUERY_ENGINE_INFO;
* };
* struct drm_i915_query query = {
* .num_items = 1,
* .items_ptr = (uintptr_t)&item,
* };
* int err, i;
*
* // First query the size of the blob we need, this needs to be large
* // enough to hold our array of engines. The kernel will fill out the
* // item.length for us, which is the number of bytes we need.
* //
* // Alternatively a large buffer can be allocated straightaway enabling
* // querying in one pass, in which case item.length should contain the
* // length of the provided buffer.
* err = ioctl(fd, DRM_IOCTL_I915_QUERY, &query);
* if (err) ...
*
* info = calloc(1, item.length);
* // Now that we allocated the required number of bytes, we call the ioctl
* // again, this time with the data_ptr pointing to our newly allocated
* // blob, which the kernel can then populate with info on all engines.
* item.data_ptr = (uintptr_t)&info;
*
* err = ioctl(fd, DRM_IOCTL_I915_QUERY, &query);
* if (err) ...
*
* // We can now access each engine in the array
* for (i = 0; i < info->num_engines; i++) {
* struct drm_i915_engine_info einfo = info->engines[i];
* u16 class = einfo.engine.class;
* u16 instance = einfo.engine.instance;
* ....
* }
*
* free(info);
*
* Each of the enumerated engines, apart from being defined by its class and
* instance (see `struct i915_engine_class_instance`), also can have flags and
* capabilities defined as documented in i915_drm.h.
*
* For instance video engines which support HEVC encoding will have the
* `I915_VIDEO_CLASS_CAPABILITY_HEVC` capability bit set.
*
* Engine discovery only fully comes to its own when combined with the new way
* of addressing engines when submitting batch buffers using contexts with
* engine maps configured.
*/
drm/i915: Engine discovery query Engine discovery query allows userspace to enumerate engines, probe their configuration features, all without needing to maintain the internal PCI ID based database. A new query for the generic i915 query ioctl is added named DRM_I915_QUERY_ENGINE_INFO, together with accompanying structure drm_i915_query_engine_info. The address of latter should be passed to the kernel in the query.data_ptr field, and should be large enough for the kernel to fill out all known engines as struct drm_i915_engine_info elements trailing the query. As with other queries, setting the item query length to zero allows userspace to query minimum required buffer size. Enumerated engines have common type mask which can be used to query all hardware engines, versus engines userspace can submit to using the execbuf uAPI. Engines also have capabilities which are per engine class namespace of bits describing features not present on all engine instances. v2: * Fixed HEVC assignment. * Reorder some fields, rename type to flags, increase width. (Lionel) * No need to allocate temporary storage if we do it engine by engine. (Lionel) v3: * Describe engine flags and mark mbz fields. (Lionel) * HEVC only applies to VCS. v4: * Squash SFC flag into main patch. * Tidy some comments. v5: * Add uabi_ prefix to engine capabilities. (Chris Wilson) * Report exact size of engine info array. (Chris Wilson) * Drop the engine flags. (Joonas Lahtinen) * Added some more reserved fields. * Move flags after class/instance. v6: * Do not check engine info array was zeroed by userspace but zero the unused fields for them instead. v7: * Simplify length calculation loop. (Lionel Landwerlin) v8: * Remove MBZ comments where not applicable. * Rename ABI flags to match engine class define naming. * Rename SFC ABI flag to reflect it applies to VCS and VECS. * SFC is wired to even _logical_ engine instances. * SFC applies to VCS and VECS. * HEVC is present on all instances on Gen11. (Tony) * Simplify length calculation even more. (Chris Wilson) * Move info_ptr assigment closer to loop for clarity. (Chris Wilson) * Use vdbox_sfc_access from runtime info. * Rebase for RUNTIME_INFO. * Refactor for lower indentation. * Rename uAPI class/instance to engine_class/instance to avoid C++ keyword. v9: * Rebase for s/num_rings/num_engines/ in RUNTIME_INFO. v10: * Use new copy_query_item. v11: * Consolidate with struct i915_engine_class_instnace. Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Jon Bloomfield <jon.bloomfield@intel.com> Cc: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com> Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Tony Ye <tony.ye@intel.com> Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> # v7 Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Link: https://patchwork.freedesktop.org/patch/msgid/20190522090054.6007-1-tvrtko.ursulin@linux.intel.com
2019-05-22 09:00:54 +00:00
/**
* struct drm_i915_engine_info
*
* Describes one engine and its capabilities as known to the driver.
drm/i915: Engine discovery query Engine discovery query allows userspace to enumerate engines, probe their configuration features, all without needing to maintain the internal PCI ID based database. A new query for the generic i915 query ioctl is added named DRM_I915_QUERY_ENGINE_INFO, together with accompanying structure drm_i915_query_engine_info. The address of latter should be passed to the kernel in the query.data_ptr field, and should be large enough for the kernel to fill out all known engines as struct drm_i915_engine_info elements trailing the query. As with other queries, setting the item query length to zero allows userspace to query minimum required buffer size. Enumerated engines have common type mask which can be used to query all hardware engines, versus engines userspace can submit to using the execbuf uAPI. Engines also have capabilities which are per engine class namespace of bits describing features not present on all engine instances. v2: * Fixed HEVC assignment. * Reorder some fields, rename type to flags, increase width. (Lionel) * No need to allocate temporary storage if we do it engine by engine. (Lionel) v3: * Describe engine flags and mark mbz fields. (Lionel) * HEVC only applies to VCS. v4: * Squash SFC flag into main patch. * Tidy some comments. v5: * Add uabi_ prefix to engine capabilities. (Chris Wilson) * Report exact size of engine info array. (Chris Wilson) * Drop the engine flags. (Joonas Lahtinen) * Added some more reserved fields. * Move flags after class/instance. v6: * Do not check engine info array was zeroed by userspace but zero the unused fields for them instead. v7: * Simplify length calculation loop. (Lionel Landwerlin) v8: * Remove MBZ comments where not applicable. * Rename ABI flags to match engine class define naming. * Rename SFC ABI flag to reflect it applies to VCS and VECS. * SFC is wired to even _logical_ engine instances. * SFC applies to VCS and VECS. * HEVC is present on all instances on Gen11. (Tony) * Simplify length calculation even more. (Chris Wilson) * Move info_ptr assigment closer to loop for clarity. (Chris Wilson) * Use vdbox_sfc_access from runtime info. * Rebase for RUNTIME_INFO. * Refactor for lower indentation. * Rename uAPI class/instance to engine_class/instance to avoid C++ keyword. v9: * Rebase for s/num_rings/num_engines/ in RUNTIME_INFO. v10: * Use new copy_query_item. v11: * Consolidate with struct i915_engine_class_instnace. Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Jon Bloomfield <jon.bloomfield@intel.com> Cc: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com> Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Tony Ye <tony.ye@intel.com> Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> # v7 Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Link: https://patchwork.freedesktop.org/patch/msgid/20190522090054.6007-1-tvrtko.ursulin@linux.intel.com
2019-05-22 09:00:54 +00:00
*/
struct drm_i915_engine_info {
/** @engine: Engine class and instance. */
drm/i915: Engine discovery query Engine discovery query allows userspace to enumerate engines, probe their configuration features, all without needing to maintain the internal PCI ID based database. A new query for the generic i915 query ioctl is added named DRM_I915_QUERY_ENGINE_INFO, together with accompanying structure drm_i915_query_engine_info. The address of latter should be passed to the kernel in the query.data_ptr field, and should be large enough for the kernel to fill out all known engines as struct drm_i915_engine_info elements trailing the query. As with other queries, setting the item query length to zero allows userspace to query minimum required buffer size. Enumerated engines have common type mask which can be used to query all hardware engines, versus engines userspace can submit to using the execbuf uAPI. Engines also have capabilities which are per engine class namespace of bits describing features not present on all engine instances. v2: * Fixed HEVC assignment. * Reorder some fields, rename type to flags, increase width. (Lionel) * No need to allocate temporary storage if we do it engine by engine. (Lionel) v3: * Describe engine flags and mark mbz fields. (Lionel) * HEVC only applies to VCS. v4: * Squash SFC flag into main patch. * Tidy some comments. v5: * Add uabi_ prefix to engine capabilities. (Chris Wilson) * Report exact size of engine info array. (Chris Wilson) * Drop the engine flags. (Joonas Lahtinen) * Added some more reserved fields. * Move flags after class/instance. v6: * Do not check engine info array was zeroed by userspace but zero the unused fields for them instead. v7: * Simplify length calculation loop. (Lionel Landwerlin) v8: * Remove MBZ comments where not applicable. * Rename ABI flags to match engine class define naming. * Rename SFC ABI flag to reflect it applies to VCS and VECS. * SFC is wired to even _logical_ engine instances. * SFC applies to VCS and VECS. * HEVC is present on all instances on Gen11. (Tony) * Simplify length calculation even more. (Chris Wilson) * Move info_ptr assigment closer to loop for clarity. (Chris Wilson) * Use vdbox_sfc_access from runtime info. * Rebase for RUNTIME_INFO. * Refactor for lower indentation. * Rename uAPI class/instance to engine_class/instance to avoid C++ keyword. v9: * Rebase for s/num_rings/num_engines/ in RUNTIME_INFO. v10: * Use new copy_query_item. v11: * Consolidate with struct i915_engine_class_instnace. Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Jon Bloomfield <jon.bloomfield@intel.com> Cc: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com> Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Tony Ye <tony.ye@intel.com> Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> # v7 Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Link: https://patchwork.freedesktop.org/patch/msgid/20190522090054.6007-1-tvrtko.ursulin@linux.intel.com
2019-05-22 09:00:54 +00:00
struct i915_engine_class_instance engine;
/** @rsvd0: Reserved field. */
drm/i915: Engine discovery query Engine discovery query allows userspace to enumerate engines, probe their configuration features, all without needing to maintain the internal PCI ID based database. A new query for the generic i915 query ioctl is added named DRM_I915_QUERY_ENGINE_INFO, together with accompanying structure drm_i915_query_engine_info. The address of latter should be passed to the kernel in the query.data_ptr field, and should be large enough for the kernel to fill out all known engines as struct drm_i915_engine_info elements trailing the query. As with other queries, setting the item query length to zero allows userspace to query minimum required buffer size. Enumerated engines have common type mask which can be used to query all hardware engines, versus engines userspace can submit to using the execbuf uAPI. Engines also have capabilities which are per engine class namespace of bits describing features not present on all engine instances. v2: * Fixed HEVC assignment. * Reorder some fields, rename type to flags, increase width. (Lionel) * No need to allocate temporary storage if we do it engine by engine. (Lionel) v3: * Describe engine flags and mark mbz fields. (Lionel) * HEVC only applies to VCS. v4: * Squash SFC flag into main patch. * Tidy some comments. v5: * Add uabi_ prefix to engine capabilities. (Chris Wilson) * Report exact size of engine info array. (Chris Wilson) * Drop the engine flags. (Joonas Lahtinen) * Added some more reserved fields. * Move flags after class/instance. v6: * Do not check engine info array was zeroed by userspace but zero the unused fields for them instead. v7: * Simplify length calculation loop. (Lionel Landwerlin) v8: * Remove MBZ comments where not applicable. * Rename ABI flags to match engine class define naming. * Rename SFC ABI flag to reflect it applies to VCS and VECS. * SFC is wired to even _logical_ engine instances. * SFC applies to VCS and VECS. * HEVC is present on all instances on Gen11. (Tony) * Simplify length calculation even more. (Chris Wilson) * Move info_ptr assigment closer to loop for clarity. (Chris Wilson) * Use vdbox_sfc_access from runtime info. * Rebase for RUNTIME_INFO. * Refactor for lower indentation. * Rename uAPI class/instance to engine_class/instance to avoid C++ keyword. v9: * Rebase for s/num_rings/num_engines/ in RUNTIME_INFO. v10: * Use new copy_query_item. v11: * Consolidate with struct i915_engine_class_instnace. Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Jon Bloomfield <jon.bloomfield@intel.com> Cc: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com> Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Tony Ye <tony.ye@intel.com> Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> # v7 Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Link: https://patchwork.freedesktop.org/patch/msgid/20190522090054.6007-1-tvrtko.ursulin@linux.intel.com
2019-05-22 09:00:54 +00:00
__u32 rsvd0;
/** @flags: Engine flags. */
drm/i915: Engine discovery query Engine discovery query allows userspace to enumerate engines, probe their configuration features, all without needing to maintain the internal PCI ID based database. A new query for the generic i915 query ioctl is added named DRM_I915_QUERY_ENGINE_INFO, together with accompanying structure drm_i915_query_engine_info. The address of latter should be passed to the kernel in the query.data_ptr field, and should be large enough for the kernel to fill out all known engines as struct drm_i915_engine_info elements trailing the query. As with other queries, setting the item query length to zero allows userspace to query minimum required buffer size. Enumerated engines have common type mask which can be used to query all hardware engines, versus engines userspace can submit to using the execbuf uAPI. Engines also have capabilities which are per engine class namespace of bits describing features not present on all engine instances. v2: * Fixed HEVC assignment. * Reorder some fields, rename type to flags, increase width. (Lionel) * No need to allocate temporary storage if we do it engine by engine. (Lionel) v3: * Describe engine flags and mark mbz fields. (Lionel) * HEVC only applies to VCS. v4: * Squash SFC flag into main patch. * Tidy some comments. v5: * Add uabi_ prefix to engine capabilities. (Chris Wilson) * Report exact size of engine info array. (Chris Wilson) * Drop the engine flags. (Joonas Lahtinen) * Added some more reserved fields. * Move flags after class/instance. v6: * Do not check engine info array was zeroed by userspace but zero the unused fields for them instead. v7: * Simplify length calculation loop. (Lionel Landwerlin) v8: * Remove MBZ comments where not applicable. * Rename ABI flags to match engine class define naming. * Rename SFC ABI flag to reflect it applies to VCS and VECS. * SFC is wired to even _logical_ engine instances. * SFC applies to VCS and VECS. * HEVC is present on all instances on Gen11. (Tony) * Simplify length calculation even more. (Chris Wilson) * Move info_ptr assigment closer to loop for clarity. (Chris Wilson) * Use vdbox_sfc_access from runtime info. * Rebase for RUNTIME_INFO. * Refactor for lower indentation. * Rename uAPI class/instance to engine_class/instance to avoid C++ keyword. v9: * Rebase for s/num_rings/num_engines/ in RUNTIME_INFO. v10: * Use new copy_query_item. v11: * Consolidate with struct i915_engine_class_instnace. Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Jon Bloomfield <jon.bloomfield@intel.com> Cc: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com> Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Tony Ye <tony.ye@intel.com> Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> # v7 Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Link: https://patchwork.freedesktop.org/patch/msgid/20190522090054.6007-1-tvrtko.ursulin@linux.intel.com
2019-05-22 09:00:54 +00:00
__u64 flags;
#define I915_ENGINE_INFO_HAS_LOGICAL_INSTANCE (1 << 0)
drm/i915: Engine discovery query Engine discovery query allows userspace to enumerate engines, probe their configuration features, all without needing to maintain the internal PCI ID based database. A new query for the generic i915 query ioctl is added named DRM_I915_QUERY_ENGINE_INFO, together with accompanying structure drm_i915_query_engine_info. The address of latter should be passed to the kernel in the query.data_ptr field, and should be large enough for the kernel to fill out all known engines as struct drm_i915_engine_info elements trailing the query. As with other queries, setting the item query length to zero allows userspace to query minimum required buffer size. Enumerated engines have common type mask which can be used to query all hardware engines, versus engines userspace can submit to using the execbuf uAPI. Engines also have capabilities which are per engine class namespace of bits describing features not present on all engine instances. v2: * Fixed HEVC assignment. * Reorder some fields, rename type to flags, increase width. (Lionel) * No need to allocate temporary storage if we do it engine by engine. (Lionel) v3: * Describe engine flags and mark mbz fields. (Lionel) * HEVC only applies to VCS. v4: * Squash SFC flag into main patch. * Tidy some comments. v5: * Add uabi_ prefix to engine capabilities. (Chris Wilson) * Report exact size of engine info array. (Chris Wilson) * Drop the engine flags. (Joonas Lahtinen) * Added some more reserved fields. * Move flags after class/instance. v6: * Do not check engine info array was zeroed by userspace but zero the unused fields for them instead. v7: * Simplify length calculation loop. (Lionel Landwerlin) v8: * Remove MBZ comments where not applicable. * Rename ABI flags to match engine class define naming. * Rename SFC ABI flag to reflect it applies to VCS and VECS. * SFC is wired to even _logical_ engine instances. * SFC applies to VCS and VECS. * HEVC is present on all instances on Gen11. (Tony) * Simplify length calculation even more. (Chris Wilson) * Move info_ptr assigment closer to loop for clarity. (Chris Wilson) * Use vdbox_sfc_access from runtime info. * Rebase for RUNTIME_INFO. * Refactor for lower indentation. * Rename uAPI class/instance to engine_class/instance to avoid C++ keyword. v9: * Rebase for s/num_rings/num_engines/ in RUNTIME_INFO. v10: * Use new copy_query_item. v11: * Consolidate with struct i915_engine_class_instnace. Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Jon Bloomfield <jon.bloomfield@intel.com> Cc: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com> Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Tony Ye <tony.ye@intel.com> Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> # v7 Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Link: https://patchwork.freedesktop.org/patch/msgid/20190522090054.6007-1-tvrtko.ursulin@linux.intel.com
2019-05-22 09:00:54 +00:00
/** @capabilities: Capabilities of this engine. */
drm/i915: Engine discovery query Engine discovery query allows userspace to enumerate engines, probe their configuration features, all without needing to maintain the internal PCI ID based database. A new query for the generic i915 query ioctl is added named DRM_I915_QUERY_ENGINE_INFO, together with accompanying structure drm_i915_query_engine_info. The address of latter should be passed to the kernel in the query.data_ptr field, and should be large enough for the kernel to fill out all known engines as struct drm_i915_engine_info elements trailing the query. As with other queries, setting the item query length to zero allows userspace to query minimum required buffer size. Enumerated engines have common type mask which can be used to query all hardware engines, versus engines userspace can submit to using the execbuf uAPI. Engines also have capabilities which are per engine class namespace of bits describing features not present on all engine instances. v2: * Fixed HEVC assignment. * Reorder some fields, rename type to flags, increase width. (Lionel) * No need to allocate temporary storage if we do it engine by engine. (Lionel) v3: * Describe engine flags and mark mbz fields. (Lionel) * HEVC only applies to VCS. v4: * Squash SFC flag into main patch. * Tidy some comments. v5: * Add uabi_ prefix to engine capabilities. (Chris Wilson) * Report exact size of engine info array. (Chris Wilson) * Drop the engine flags. (Joonas Lahtinen) * Added some more reserved fields. * Move flags after class/instance. v6: * Do not check engine info array was zeroed by userspace but zero the unused fields for them instead. v7: * Simplify length calculation loop. (Lionel Landwerlin) v8: * Remove MBZ comments where not applicable. * Rename ABI flags to match engine class define naming. * Rename SFC ABI flag to reflect it applies to VCS and VECS. * SFC is wired to even _logical_ engine instances. * SFC applies to VCS and VECS. * HEVC is present on all instances on Gen11. (Tony) * Simplify length calculation even more. (Chris Wilson) * Move info_ptr assigment closer to loop for clarity. (Chris Wilson) * Use vdbox_sfc_access from runtime info. * Rebase for RUNTIME_INFO. * Refactor for lower indentation. * Rename uAPI class/instance to engine_class/instance to avoid C++ keyword. v9: * Rebase for s/num_rings/num_engines/ in RUNTIME_INFO. v10: * Use new copy_query_item. v11: * Consolidate with struct i915_engine_class_instnace. Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Jon Bloomfield <jon.bloomfield@intel.com> Cc: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com> Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Tony Ye <tony.ye@intel.com> Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> # v7 Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Link: https://patchwork.freedesktop.org/patch/msgid/20190522090054.6007-1-tvrtko.ursulin@linux.intel.com
2019-05-22 09:00:54 +00:00
__u64 capabilities;
#define I915_VIDEO_CLASS_CAPABILITY_HEVC (1 << 0)
#define I915_VIDEO_AND_ENHANCE_CLASS_CAPABILITY_SFC (1 << 1)
/** @logical_instance: Logical instance of engine */
__u16 logical_instance;
/** @rsvd1: Reserved fields. */
__u16 rsvd1[3];
/** @rsvd2: Reserved fields. */
__u64 rsvd2[3];
drm/i915: Engine discovery query Engine discovery query allows userspace to enumerate engines, probe their configuration features, all without needing to maintain the internal PCI ID based database. A new query for the generic i915 query ioctl is added named DRM_I915_QUERY_ENGINE_INFO, together with accompanying structure drm_i915_query_engine_info. The address of latter should be passed to the kernel in the query.data_ptr field, and should be large enough for the kernel to fill out all known engines as struct drm_i915_engine_info elements trailing the query. As with other queries, setting the item query length to zero allows userspace to query minimum required buffer size. Enumerated engines have common type mask which can be used to query all hardware engines, versus engines userspace can submit to using the execbuf uAPI. Engines also have capabilities which are per engine class namespace of bits describing features not present on all engine instances. v2: * Fixed HEVC assignment. * Reorder some fields, rename type to flags, increase width. (Lionel) * No need to allocate temporary storage if we do it engine by engine. (Lionel) v3: * Describe engine flags and mark mbz fields. (Lionel) * HEVC only applies to VCS. v4: * Squash SFC flag into main patch. * Tidy some comments. v5: * Add uabi_ prefix to engine capabilities. (Chris Wilson) * Report exact size of engine info array. (Chris Wilson) * Drop the engine flags. (Joonas Lahtinen) * Added some more reserved fields. * Move flags after class/instance. v6: * Do not check engine info array was zeroed by userspace but zero the unused fields for them instead. v7: * Simplify length calculation loop. (Lionel Landwerlin) v8: * Remove MBZ comments where not applicable. * Rename ABI flags to match engine class define naming. * Rename SFC ABI flag to reflect it applies to VCS and VECS. * SFC is wired to even _logical_ engine instances. * SFC applies to VCS and VECS. * HEVC is present on all instances on Gen11. (Tony) * Simplify length calculation even more. (Chris Wilson) * Move info_ptr assigment closer to loop for clarity. (Chris Wilson) * Use vdbox_sfc_access from runtime info. * Rebase for RUNTIME_INFO. * Refactor for lower indentation. * Rename uAPI class/instance to engine_class/instance to avoid C++ keyword. v9: * Rebase for s/num_rings/num_engines/ in RUNTIME_INFO. v10: * Use new copy_query_item. v11: * Consolidate with struct i915_engine_class_instnace. Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Jon Bloomfield <jon.bloomfield@intel.com> Cc: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com> Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Tony Ye <tony.ye@intel.com> Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> # v7 Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Link: https://patchwork.freedesktop.org/patch/msgid/20190522090054.6007-1-tvrtko.ursulin@linux.intel.com
2019-05-22 09:00:54 +00:00
};
/**
* struct drm_i915_query_engine_info
*
* Engine info query enumerates all engines known to the driver by filling in
* an array of struct drm_i915_engine_info structures.
*/
struct drm_i915_query_engine_info {
/** @num_engines: Number of struct drm_i915_engine_info structs following. */
drm/i915: Engine discovery query Engine discovery query allows userspace to enumerate engines, probe their configuration features, all without needing to maintain the internal PCI ID based database. A new query for the generic i915 query ioctl is added named DRM_I915_QUERY_ENGINE_INFO, together with accompanying structure drm_i915_query_engine_info. The address of latter should be passed to the kernel in the query.data_ptr field, and should be large enough for the kernel to fill out all known engines as struct drm_i915_engine_info elements trailing the query. As with other queries, setting the item query length to zero allows userspace to query minimum required buffer size. Enumerated engines have common type mask which can be used to query all hardware engines, versus engines userspace can submit to using the execbuf uAPI. Engines also have capabilities which are per engine class namespace of bits describing features not present on all engine instances. v2: * Fixed HEVC assignment. * Reorder some fields, rename type to flags, increase width. (Lionel) * No need to allocate temporary storage if we do it engine by engine. (Lionel) v3: * Describe engine flags and mark mbz fields. (Lionel) * HEVC only applies to VCS. v4: * Squash SFC flag into main patch. * Tidy some comments. v5: * Add uabi_ prefix to engine capabilities. (Chris Wilson) * Report exact size of engine info array. (Chris Wilson) * Drop the engine flags. (Joonas Lahtinen) * Added some more reserved fields. * Move flags after class/instance. v6: * Do not check engine info array was zeroed by userspace but zero the unused fields for them instead. v7: * Simplify length calculation loop. (Lionel Landwerlin) v8: * Remove MBZ comments where not applicable. * Rename ABI flags to match engine class define naming. * Rename SFC ABI flag to reflect it applies to VCS and VECS. * SFC is wired to even _logical_ engine instances. * SFC applies to VCS and VECS. * HEVC is present on all instances on Gen11. (Tony) * Simplify length calculation even more. (Chris Wilson) * Move info_ptr assigment closer to loop for clarity. (Chris Wilson) * Use vdbox_sfc_access from runtime info. * Rebase for RUNTIME_INFO. * Refactor for lower indentation. * Rename uAPI class/instance to engine_class/instance to avoid C++ keyword. v9: * Rebase for s/num_rings/num_engines/ in RUNTIME_INFO. v10: * Use new copy_query_item. v11: * Consolidate with struct i915_engine_class_instnace. Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Jon Bloomfield <jon.bloomfield@intel.com> Cc: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com> Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Tony Ye <tony.ye@intel.com> Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> # v7 Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Link: https://patchwork.freedesktop.org/patch/msgid/20190522090054.6007-1-tvrtko.ursulin@linux.intel.com
2019-05-22 09:00:54 +00:00
__u32 num_engines;
/** @rsvd: MBZ */
drm/i915: Engine discovery query Engine discovery query allows userspace to enumerate engines, probe their configuration features, all without needing to maintain the internal PCI ID based database. A new query for the generic i915 query ioctl is added named DRM_I915_QUERY_ENGINE_INFO, together with accompanying structure drm_i915_query_engine_info. The address of latter should be passed to the kernel in the query.data_ptr field, and should be large enough for the kernel to fill out all known engines as struct drm_i915_engine_info elements trailing the query. As with other queries, setting the item query length to zero allows userspace to query minimum required buffer size. Enumerated engines have common type mask which can be used to query all hardware engines, versus engines userspace can submit to using the execbuf uAPI. Engines also have capabilities which are per engine class namespace of bits describing features not present on all engine instances. v2: * Fixed HEVC assignment. * Reorder some fields, rename type to flags, increase width. (Lionel) * No need to allocate temporary storage if we do it engine by engine. (Lionel) v3: * Describe engine flags and mark mbz fields. (Lionel) * HEVC only applies to VCS. v4: * Squash SFC flag into main patch. * Tidy some comments. v5: * Add uabi_ prefix to engine capabilities. (Chris Wilson) * Report exact size of engine info array. (Chris Wilson) * Drop the engine flags. (Joonas Lahtinen) * Added some more reserved fields. * Move flags after class/instance. v6: * Do not check engine info array was zeroed by userspace but zero the unused fields for them instead. v7: * Simplify length calculation loop. (Lionel Landwerlin) v8: * Remove MBZ comments where not applicable. * Rename ABI flags to match engine class define naming. * Rename SFC ABI flag to reflect it applies to VCS and VECS. * SFC is wired to even _logical_ engine instances. * SFC applies to VCS and VECS. * HEVC is present on all instances on Gen11. (Tony) * Simplify length calculation even more. (Chris Wilson) * Move info_ptr assigment closer to loop for clarity. (Chris Wilson) * Use vdbox_sfc_access from runtime info. * Rebase for RUNTIME_INFO. * Refactor for lower indentation. * Rename uAPI class/instance to engine_class/instance to avoid C++ keyword. v9: * Rebase for s/num_rings/num_engines/ in RUNTIME_INFO. v10: * Use new copy_query_item. v11: * Consolidate with struct i915_engine_class_instnace. Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Jon Bloomfield <jon.bloomfield@intel.com> Cc: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com> Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Tony Ye <tony.ye@intel.com> Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> # v7 Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Link: https://patchwork.freedesktop.org/patch/msgid/20190522090054.6007-1-tvrtko.ursulin@linux.intel.com
2019-05-22 09:00:54 +00:00
__u32 rsvd[3];
/** @engines: Marker for drm_i915_engine_info structures. */
drm/i915: Engine discovery query Engine discovery query allows userspace to enumerate engines, probe their configuration features, all without needing to maintain the internal PCI ID based database. A new query for the generic i915 query ioctl is added named DRM_I915_QUERY_ENGINE_INFO, together with accompanying structure drm_i915_query_engine_info. The address of latter should be passed to the kernel in the query.data_ptr field, and should be large enough for the kernel to fill out all known engines as struct drm_i915_engine_info elements trailing the query. As with other queries, setting the item query length to zero allows userspace to query minimum required buffer size. Enumerated engines have common type mask which can be used to query all hardware engines, versus engines userspace can submit to using the execbuf uAPI. Engines also have capabilities which are per engine class namespace of bits describing features not present on all engine instances. v2: * Fixed HEVC assignment. * Reorder some fields, rename type to flags, increase width. (Lionel) * No need to allocate temporary storage if we do it engine by engine. (Lionel) v3: * Describe engine flags and mark mbz fields. (Lionel) * HEVC only applies to VCS. v4: * Squash SFC flag into main patch. * Tidy some comments. v5: * Add uabi_ prefix to engine capabilities. (Chris Wilson) * Report exact size of engine info array. (Chris Wilson) * Drop the engine flags. (Joonas Lahtinen) * Added some more reserved fields. * Move flags after class/instance. v6: * Do not check engine info array was zeroed by userspace but zero the unused fields for them instead. v7: * Simplify length calculation loop. (Lionel Landwerlin) v8: * Remove MBZ comments where not applicable. * Rename ABI flags to match engine class define naming. * Rename SFC ABI flag to reflect it applies to VCS and VECS. * SFC is wired to even _logical_ engine instances. * SFC applies to VCS and VECS. * HEVC is present on all instances on Gen11. (Tony) * Simplify length calculation even more. (Chris Wilson) * Move info_ptr assigment closer to loop for clarity. (Chris Wilson) * Use vdbox_sfc_access from runtime info. * Rebase for RUNTIME_INFO. * Refactor for lower indentation. * Rename uAPI class/instance to engine_class/instance to avoid C++ keyword. v9: * Rebase for s/num_rings/num_engines/ in RUNTIME_INFO. v10: * Use new copy_query_item. v11: * Consolidate with struct i915_engine_class_instnace. Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Jon Bloomfield <jon.bloomfield@intel.com> Cc: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com> Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Tony Ye <tony.ye@intel.com> Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> # v7 Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Link: https://patchwork.freedesktop.org/patch/msgid/20190522090054.6007-1-tvrtko.ursulin@linux.intel.com
2019-05-22 09:00:54 +00:00
struct drm_i915_engine_info engines[];
};
/**
* struct drm_i915_query_perf_config
*
drm/i915/uapi: Add DRM_I915_QUERY_GEOMETRY_SUBSLICES Newer platforms have DSS that aren't necessarily available for both geometry and compute, two queries will need to exist. This introduces the first, when passing a valid engine class and engine instance in the flags returns a topology describing geometry. Based on past discussion, we currently only support this new query item on Xe_HP and beyond; earlier platforms do not need to worry about geometry and compute pipelines having access to different topology and should continue to use the existing topology query. v2: fix white space errors v3: change flags from hosting 2 8 bit numbers to holding a i915_engine_class_instance struct v4: add error if non rcs engine passed. v5 (by MattR): - Improve kerneldoc and cross references to related structs/enums. (Daniel) - Clarify that geometry query is only supported on render engines (Francisco) - Clarify that the new query is only supported on Xe_HP+. - Fix checkpatch warnings. Cc: Ashutosh Dixit <ashutosh.dixit@intel.com> Cc: Matt Roper <matthew.d.roper@intel.com> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Francisco Jerez <currojerez@riseup.net> UMD (mesa): https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14143 Testcase: igt@i915_query@test-query-geometry-subslices Signed-off-by: Matt Atwood <matthew.s.atwood@intel.com> Signed-off-by: Matt Roper <matthew.d.roper@intel.com> Reviewed-by: Francisco Jerez <currojerez@riseup.net> Link: https://patchwork.freedesktop.org/patch/msgid/20220414192230.749771-4-matthew.d.roper@intel.com
2022-04-14 19:22:30 +00:00
* Data written by the kernel with query %DRM_I915_QUERY_PERF_CONFIG and
* %DRM_I915_QUERY_GEOMETRY_SUBSLICES.
*/
struct drm_i915_query_perf_config {
union {
/**
* @n_configs:
*
* When &drm_i915_query_item.flags ==
* %DRM_I915_QUERY_PERF_CONFIG_LIST, i915 sets this fields to
* the number of configurations available.
*/
__u64 n_configs;
/**
* @config:
*
* When &drm_i915_query_item.flags ==
* %DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_ID, i915 will use the
* value in this field as configuration identifier to decide
* what data to write into config_ptr.
*/
__u64 config;
/**
* @uuid:
*
* When &drm_i915_query_item.flags ==
* %DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID, i915 will use the
* value in this field as configuration identifier to decide
* what data to write into config_ptr.
*
* String formatted like "%08x-%04x-%04x-%04x-%012x"
*/
char uuid[36];
};
/**
* @flags:
*
* Unused for now. Must be cleared to zero.
*/
__u32 flags;
/**
* @data:
*
* When &drm_i915_query_item.flags == %DRM_I915_QUERY_PERF_CONFIG_LIST,
* i915 will write an array of __u64 of configuration identifiers.
*
* When &drm_i915_query_item.flags == %DRM_I915_QUERY_PERF_CONFIG_DATA,
* i915 will write a struct drm_i915_perf_oa_config. If the following
* fields of struct drm_i915_perf_oa_config are not set to 0, i915 will
* write into the associated pointers the values of submitted when the
* configuration was created :
*
* - &drm_i915_perf_oa_config.n_mux_regs
* - &drm_i915_perf_oa_config.n_boolean_regs
* - &drm_i915_perf_oa_config.n_flex_regs
*/
__u8 data[];
};
/**
* enum drm_i915_gem_memory_class - Supported memory classes
*/
enum drm_i915_gem_memory_class {
/** @I915_MEMORY_CLASS_SYSTEM: System memory */
I915_MEMORY_CLASS_SYSTEM = 0,
/** @I915_MEMORY_CLASS_DEVICE: Device local-memory */
I915_MEMORY_CLASS_DEVICE,
};
/**
* struct drm_i915_gem_memory_class_instance - Identify particular memory region
*/
struct drm_i915_gem_memory_class_instance {
/** @memory_class: See enum drm_i915_gem_memory_class */
__u16 memory_class;
/** @memory_instance: Which instance */
__u16 memory_instance;
};
/**
* struct drm_i915_memory_region_info - Describes one region as known to the
* driver.
*
* Note this is using both struct drm_i915_query_item and struct drm_i915_query.
* For this new query we are adding the new query id DRM_I915_QUERY_MEMORY_REGIONS
* at &drm_i915_query_item.query_id.
*/
struct drm_i915_memory_region_info {
/** @region: The class:instance pair encoding */
struct drm_i915_gem_memory_class_instance region;
/** @rsvd0: MBZ */
__u32 rsvd0;
/**
* @probed_size: Memory probed by the driver
*
* Note that it should not be possible to ever encounter a zero value
* here, also note that no current region type will ever return -1 here.
* Although for future region types, this might be a possibility. The
* same applies to the other size fields.
*/
__u64 probed_size;
/**
* @unallocated_size: Estimate of memory remaining
*
* Requires CAP_PERFMON or CAP_SYS_ADMIN to get reliable accounting.
* Without this (or if this is an older kernel) the value here will
* always equal the @probed_size. Note this is only currently tracked
* for I915_MEMORY_CLASS_DEVICE regions (for other types the value here
* will always equal the @probed_size).
*/
__u64 unallocated_size;
union {
/** @rsvd1: MBZ */
__u64 rsvd1[8];
struct {
/**
* @probed_cpu_visible_size: Memory probed by the driver
* that is CPU accessible.
*
* This will be always be <= @probed_size, and the
* remainder (if there is any) will not be CPU
* accessible.
*
* On systems without small BAR, the @probed_size will
* always equal the @probed_cpu_visible_size, since all
* of it will be CPU accessible.
*
* Note this is only tracked for
* I915_MEMORY_CLASS_DEVICE regions (for other types the
* value here will always equal the @probed_size).
*
* Note that if the value returned here is zero, then
* this must be an old kernel which lacks the relevant
* small-bar uAPI support (including
* I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS), but on
* such systems we should never actually end up with a
* small BAR configuration, assuming we are able to load
* the kernel module. Hence it should be safe to treat
* this the same as when @probed_cpu_visible_size ==
* @probed_size.
*/
__u64 probed_cpu_visible_size;
/**
* @unallocated_cpu_visible_size: Estimate of CPU
* visible memory remaining.
*
* Note this is only tracked for
* I915_MEMORY_CLASS_DEVICE regions (for other types the
* value here will always equal the
* @probed_cpu_visible_size).
*
* Requires CAP_PERFMON or CAP_SYS_ADMIN to get reliable
* accounting. Without this the value here will always
* equal the @probed_cpu_visible_size. Note this is only
* currently tracked for I915_MEMORY_CLASS_DEVICE
* regions (for other types the value here will also
* always equal the @probed_cpu_visible_size).
*
* If this is an older kernel the value here will be
* zero, see also @probed_cpu_visible_size.
*/
__u64 unallocated_cpu_visible_size;
};
};
};
/**
* struct drm_i915_query_memory_regions
*
* The region info query enumerates all regions known to the driver by filling
* in an array of struct drm_i915_memory_region_info structures.
*
* Example for getting the list of supported regions:
*
* .. code-block:: C
*
* struct drm_i915_query_memory_regions *info;
* struct drm_i915_query_item item = {
* .query_id = DRM_I915_QUERY_MEMORY_REGIONS;
* };
* struct drm_i915_query query = {
* .num_items = 1,
* .items_ptr = (uintptr_t)&item,
* };
* int err, i;
*
* // First query the size of the blob we need, this needs to be large
* // enough to hold our array of regions. The kernel will fill out the
* // item.length for us, which is the number of bytes we need.
* err = ioctl(fd, DRM_IOCTL_I915_QUERY, &query);
* if (err) ...
*
* info = calloc(1, item.length);
* // Now that we allocated the required number of bytes, we call the ioctl
* // again, this time with the data_ptr pointing to our newly allocated
* // blob, which the kernel can then populate with the all the region info.
* item.data_ptr = (uintptr_t)&info,
*
* err = ioctl(fd, DRM_IOCTL_I915_QUERY, &query);
* if (err) ...
*
* // We can now access each region in the array
* for (i = 0; i < info->num_regions; i++) {
* struct drm_i915_memory_region_info mr = info->regions[i];
* u16 class = mr.region.class;
* u16 instance = mr.region.instance;
*
* ....
* }
*
* free(info);
*/
struct drm_i915_query_memory_regions {
/** @num_regions: Number of supported regions */
__u32 num_regions;
/** @rsvd: MBZ */
__u32 rsvd[3];
/** @regions: Info about each supported region */
struct drm_i915_memory_region_info regions[];
};
/**
* struct drm_i915_query_guc_submission_version - query GuC submission interface version
*/
struct drm_i915_query_guc_submission_version {
/** @branch: Firmware branch version. */
__u32 branch;
/** @major: Firmware major version. */
__u32 major;
/** @minor: Firmware minor version. */
__u32 minor;
/** @patch: Firmware patch version. */
__u32 patch;
};
/**
* DOC: GuC HWCONFIG blob uAPI
*
* The GuC produces a blob with information about the current device.
* i915 reads this blob from GuC and makes it available via this uAPI.
*
* The format and meaning of the blob content are documented in the
* Programmer's Reference Manual.
*/
drm/i915/uapi: introduce drm_i915_gem_create_ext Same old gem_create but with now with extensions support. This is needed to support various upcoming usecases. v2:(Chris) - Use separate ioctl number for gem_create_ext, instead of hijacking the existing gem_create ioctl, otherwise we run into the issue with being unable to detect if the kernel supports the new extension behaviour. - We now have gem_create_ext.flags, which should be zeroed. - I915_GEM_CREATE_EXT_SETPARAM value is now zero, since this is the index into our array of extensions. - Setup a "vanilla" object which we can directly apply our extensions to. v3:(Daniel & Jason) - drop I915_GEM_CREATE_EXT_SETPARAM. Instead just have each extension do one thing only, instead of generic setparam which can cover various use cases. - add some kernel-doc. Signed-off-by: Matthew Auld <matthew.auld@intel.com> Signed-off-by: CQ Tang <cq.tang@intel.com> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> Cc: Lionel Landwerlin <lionel.g.landwerlin@linux.intel.com> Cc: Jordan Justen <jordan.l.justen@intel.com> Cc: Daniel Vetter <daniel.vetter@intel.com> Cc: Kenneth Graunke <kenneth@whitecape.org> Cc: Jason Ekstrand <jason@jlekstrand.net> Cc: Dave Airlie <airlied@gmail.com> Cc: dri-devel@lists.freedesktop.org Cc: mesa-dev@lists.freedesktop.org Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Link: https://patchwork.freedesktop.org/patch/msgid/20210429103056.407067-5-matthew.auld@intel.com
2021-04-29 10:30:52 +00:00
/**
* struct drm_i915_gem_create_ext - Existing gem_create behaviour, with added
* extension support using struct i915_user_extension.
*
* Note that new buffer flags should be added here, at least for the stuff that
* is immutable. Previously we would have two ioctls, one to create the object
* with gem_create, and another to apply various parameters, however this
* creates some ambiguity for the params which are considered immutable. Also in
* general we're phasing out the various SET/GET ioctls.
drm/i915/uapi: introduce drm_i915_gem_create_ext Same old gem_create but with now with extensions support. This is needed to support various upcoming usecases. v2:(Chris) - Use separate ioctl number for gem_create_ext, instead of hijacking the existing gem_create ioctl, otherwise we run into the issue with being unable to detect if the kernel supports the new extension behaviour. - We now have gem_create_ext.flags, which should be zeroed. - I915_GEM_CREATE_EXT_SETPARAM value is now zero, since this is the index into our array of extensions. - Setup a "vanilla" object which we can directly apply our extensions to. v3:(Daniel & Jason) - drop I915_GEM_CREATE_EXT_SETPARAM. Instead just have each extension do one thing only, instead of generic setparam which can cover various use cases. - add some kernel-doc. Signed-off-by: Matthew Auld <matthew.auld@intel.com> Signed-off-by: CQ Tang <cq.tang@intel.com> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> Cc: Lionel Landwerlin <lionel.g.landwerlin@linux.intel.com> Cc: Jordan Justen <jordan.l.justen@intel.com> Cc: Daniel Vetter <daniel.vetter@intel.com> Cc: Kenneth Graunke <kenneth@whitecape.org> Cc: Jason Ekstrand <jason@jlekstrand.net> Cc: Dave Airlie <airlied@gmail.com> Cc: dri-devel@lists.freedesktop.org Cc: mesa-dev@lists.freedesktop.org Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Link: https://patchwork.freedesktop.org/patch/msgid/20210429103056.407067-5-matthew.auld@intel.com
2021-04-29 10:30:52 +00:00
*/
struct drm_i915_gem_create_ext {
/**
* @size: Requested size for the object.
*
* The (page-aligned) allocated size for the object will be returned.
*
drm/i915: enable PS64 support for DG2 It turns out that on production DG2/ATS HW we should have support for PS64. This feature allows to provide a 64K TLB hint at the PTE level, which is a lot more flexible than the current method of enabling 64K GTT pages for the entire page-table, since that leads to all kinds of annoying restrictions, as documented in: commit caa574ffc4aaf4f29b890223878c63e2e7772f62 Author: Matthew Auld <matthew.auld@intel.com> Date: Sat Feb 19 00:17:49 2022 +0530 drm/i915/uapi: document behaviour for DG2 64K support On discrete platforms like DG2, we need to support a minimum page size of 64K when dealing with device local-memory. This is quite tricky for various reasons, so try to document the new implicit uapi for this. With PS64, we can now drop the 2M GTT alignment restriction, and instead only require 64K or larger when dealing with lmem. We still use the compact-pt layout when possible, but only when we are certain that this doesn't interfere with userspace. Note that this is a change in uAPI behaviour, but hopefully shouldn't be a concern (IGT is at least able to autodetect the alignment), since we are only making the GTT alignment constraint less restrictive. Based on a patch from CQ Tang. v2: update the comment wrt scratch page v3: (Nirmoy) - Fix the selftest to actually use the random size, plus some comment improvements, also drop the rem stuff. Reported-by: Michal Mrozek <michal.mrozek@intel.com> Signed-off-by: Matthew Auld <matthew.auld@intel.com> Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com> Cc: Stuart Summers <stuart.summers@intel.com> Cc: Jordan Justen <jordan.l.justen@intel.com> Cc: Yang A Shi <yang.a.shi@intel.com> Cc: Nirmoy Das <nirmoy.das@intel.com> Cc: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com> Reviewed-by: Nirmoy Das <nirmoy.das@intel.com> Acked-by: Michal Mrozek <michal.mrozek@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20221004114915.221708-1-matthew.auld@intel.com
2022-10-04 11:49:14 +00:00
* On platforms like DG2/ATS the kernel will always use 64K or larger
* pages for I915_MEMORY_CLASS_DEVICE. The kernel also requires a
* minimum of 64K GTT alignment for such objects.
*
drm/i915: enable PS64 support for DG2 It turns out that on production DG2/ATS HW we should have support for PS64. This feature allows to provide a 64K TLB hint at the PTE level, which is a lot more flexible than the current method of enabling 64K GTT pages for the entire page-table, since that leads to all kinds of annoying restrictions, as documented in: commit caa574ffc4aaf4f29b890223878c63e2e7772f62 Author: Matthew Auld <matthew.auld@intel.com> Date: Sat Feb 19 00:17:49 2022 +0530 drm/i915/uapi: document behaviour for DG2 64K support On discrete platforms like DG2, we need to support a minimum page size of 64K when dealing with device local-memory. This is quite tricky for various reasons, so try to document the new implicit uapi for this. With PS64, we can now drop the 2M GTT alignment restriction, and instead only require 64K or larger when dealing with lmem. We still use the compact-pt layout when possible, but only when we are certain that this doesn't interfere with userspace. Note that this is a change in uAPI behaviour, but hopefully shouldn't be a concern (IGT is at least able to autodetect the alignment), since we are only making the GTT alignment constraint less restrictive. Based on a patch from CQ Tang. v2: update the comment wrt scratch page v3: (Nirmoy) - Fix the selftest to actually use the random size, plus some comment improvements, also drop the rem stuff. Reported-by: Michal Mrozek <michal.mrozek@intel.com> Signed-off-by: Matthew Auld <matthew.auld@intel.com> Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com> Cc: Stuart Summers <stuart.summers@intel.com> Cc: Jordan Justen <jordan.l.justen@intel.com> Cc: Yang A Shi <yang.a.shi@intel.com> Cc: Nirmoy Das <nirmoy.das@intel.com> Cc: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com> Reviewed-by: Nirmoy Das <nirmoy.das@intel.com> Acked-by: Michal Mrozek <michal.mrozek@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20221004114915.221708-1-matthew.auld@intel.com
2022-10-04 11:49:14 +00:00
* NOTE: Previously the ABI here required a minimum GTT alignment of 2M
* on DG2/ATS, due to how the hardware implemented 64K GTT page support,
* where we had the following complications:
*
* 1) The entire PDE (which covers a 2MB virtual address range), must
* contain only 64K PTEs, i.e mixing 4K and 64K PTEs in the same
* PDE is forbidden by the hardware.
*
* 2) We still need to support 4K PTEs for I915_MEMORY_CLASS_SYSTEM
* objects.
*
drm/i915: enable PS64 support for DG2 It turns out that on production DG2/ATS HW we should have support for PS64. This feature allows to provide a 64K TLB hint at the PTE level, which is a lot more flexible than the current method of enabling 64K GTT pages for the entire page-table, since that leads to all kinds of annoying restrictions, as documented in: commit caa574ffc4aaf4f29b890223878c63e2e7772f62 Author: Matthew Auld <matthew.auld@intel.com> Date: Sat Feb 19 00:17:49 2022 +0530 drm/i915/uapi: document behaviour for DG2 64K support On discrete platforms like DG2, we need to support a minimum page size of 64K when dealing with device local-memory. This is quite tricky for various reasons, so try to document the new implicit uapi for this. With PS64, we can now drop the 2M GTT alignment restriction, and instead only require 64K or larger when dealing with lmem. We still use the compact-pt layout when possible, but only when we are certain that this doesn't interfere with userspace. Note that this is a change in uAPI behaviour, but hopefully shouldn't be a concern (IGT is at least able to autodetect the alignment), since we are only making the GTT alignment constraint less restrictive. Based on a patch from CQ Tang. v2: update the comment wrt scratch page v3: (Nirmoy) - Fix the selftest to actually use the random size, plus some comment improvements, also drop the rem stuff. Reported-by: Michal Mrozek <michal.mrozek@intel.com> Signed-off-by: Matthew Auld <matthew.auld@intel.com> Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com> Cc: Stuart Summers <stuart.summers@intel.com> Cc: Jordan Justen <jordan.l.justen@intel.com> Cc: Yang A Shi <yang.a.shi@intel.com> Cc: Nirmoy Das <nirmoy.das@intel.com> Cc: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com> Reviewed-by: Nirmoy Das <nirmoy.das@intel.com> Acked-by: Michal Mrozek <michal.mrozek@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20221004114915.221708-1-matthew.auld@intel.com
2022-10-04 11:49:14 +00:00
* However on actual production HW this was completely changed to now
* allow setting a TLB hint at the PTE level (see PS64), which is a lot
* more flexible than the above. With this the 2M restriction was
* dropped where we now only require 64K.
drm/i915/uapi: introduce drm_i915_gem_create_ext Same old gem_create but with now with extensions support. This is needed to support various upcoming usecases. v2:(Chris) - Use separate ioctl number for gem_create_ext, instead of hijacking the existing gem_create ioctl, otherwise we run into the issue with being unable to detect if the kernel supports the new extension behaviour. - We now have gem_create_ext.flags, which should be zeroed. - I915_GEM_CREATE_EXT_SETPARAM value is now zero, since this is the index into our array of extensions. - Setup a "vanilla" object which we can directly apply our extensions to. v3:(Daniel & Jason) - drop I915_GEM_CREATE_EXT_SETPARAM. Instead just have each extension do one thing only, instead of generic setparam which can cover various use cases. - add some kernel-doc. Signed-off-by: Matthew Auld <matthew.auld@intel.com> Signed-off-by: CQ Tang <cq.tang@intel.com> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> Cc: Lionel Landwerlin <lionel.g.landwerlin@linux.intel.com> Cc: Jordan Justen <jordan.l.justen@intel.com> Cc: Daniel Vetter <daniel.vetter@intel.com> Cc: Kenneth Graunke <kenneth@whitecape.org> Cc: Jason Ekstrand <jason@jlekstrand.net> Cc: Dave Airlie <airlied@gmail.com> Cc: dri-devel@lists.freedesktop.org Cc: mesa-dev@lists.freedesktop.org Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Link: https://patchwork.freedesktop.org/patch/msgid/20210429103056.407067-5-matthew.auld@intel.com
2021-04-29 10:30:52 +00:00
*/
__u64 size;
drm/i915/uapi: introduce drm_i915_gem_create_ext Same old gem_create but with now with extensions support. This is needed to support various upcoming usecases. v2:(Chris) - Use separate ioctl number for gem_create_ext, instead of hijacking the existing gem_create ioctl, otherwise we run into the issue with being unable to detect if the kernel supports the new extension behaviour. - We now have gem_create_ext.flags, which should be zeroed. - I915_GEM_CREATE_EXT_SETPARAM value is now zero, since this is the index into our array of extensions. - Setup a "vanilla" object which we can directly apply our extensions to. v3:(Daniel & Jason) - drop I915_GEM_CREATE_EXT_SETPARAM. Instead just have each extension do one thing only, instead of generic setparam which can cover various use cases. - add some kernel-doc. Signed-off-by: Matthew Auld <matthew.auld@intel.com> Signed-off-by: CQ Tang <cq.tang@intel.com> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> Cc: Lionel Landwerlin <lionel.g.landwerlin@linux.intel.com> Cc: Jordan Justen <jordan.l.justen@intel.com> Cc: Daniel Vetter <daniel.vetter@intel.com> Cc: Kenneth Graunke <kenneth@whitecape.org> Cc: Jason Ekstrand <jason@jlekstrand.net> Cc: Dave Airlie <airlied@gmail.com> Cc: dri-devel@lists.freedesktop.org Cc: mesa-dev@lists.freedesktop.org Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Link: https://patchwork.freedesktop.org/patch/msgid/20210429103056.407067-5-matthew.auld@intel.com
2021-04-29 10:30:52 +00:00
/**
* @handle: Returned handle for the object.
*
* Object handles are nonzero.
*/
__u32 handle;
/**
* @flags: Optional flags.
*
* Supported values:
*
* I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS - Signal to the kernel that
* the object will need to be accessed via the CPU.
*
* Only valid when placing objects in I915_MEMORY_CLASS_DEVICE, and only
* strictly required on configurations where some subset of the device
* memory is directly visible/mappable through the CPU (which we also
* call small BAR), like on some DG2+ systems. Note that this is quite
* undesirable, but due to various factors like the client CPU, BIOS etc
* it's something we can expect to see in the wild. See
* &drm_i915_memory_region_info.probed_cpu_visible_size for how to
* determine if this system applies.
*
* Note that one of the placements MUST be I915_MEMORY_CLASS_SYSTEM, to
* ensure the kernel can always spill the allocation to system memory,
* if the object can't be allocated in the mappable part of
* I915_MEMORY_CLASS_DEVICE.
*
* Also note that since the kernel only supports flat-CCS on objects
* that can *only* be placed in I915_MEMORY_CLASS_DEVICE, we therefore
* don't support I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS together with
* flat-CCS.
*
* Without this hint, the kernel will assume that non-mappable
* I915_MEMORY_CLASS_DEVICE is preferred for this object. Note that the
* kernel can still migrate the object to the mappable part, as a last
* resort, if userspace ever CPU faults this object, but this might be
* expensive, and so ideally should be avoided.
*
* On older kernels which lack the relevant small-bar uAPI support (see
* also &drm_i915_memory_region_info.probed_cpu_visible_size),
* usage of the flag will result in an error, but it should NEVER be
* possible to end up with a small BAR configuration, assuming we can
* also successfully load the i915 kernel module. In such cases the
* entire I915_MEMORY_CLASS_DEVICE region will be CPU accessible, and as
* such there are zero restrictions on where the object can be placed.
*/
#define I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS (1 << 0)
drm/i915/uapi: introduce drm_i915_gem_create_ext Same old gem_create but with now with extensions support. This is needed to support various upcoming usecases. v2:(Chris) - Use separate ioctl number for gem_create_ext, instead of hijacking the existing gem_create ioctl, otherwise we run into the issue with being unable to detect if the kernel supports the new extension behaviour. - We now have gem_create_ext.flags, which should be zeroed. - I915_GEM_CREATE_EXT_SETPARAM value is now zero, since this is the index into our array of extensions. - Setup a "vanilla" object which we can directly apply our extensions to. v3:(Daniel & Jason) - drop I915_GEM_CREATE_EXT_SETPARAM. Instead just have each extension do one thing only, instead of generic setparam which can cover various use cases. - add some kernel-doc. Signed-off-by: Matthew Auld <matthew.auld@intel.com> Signed-off-by: CQ Tang <cq.tang@intel.com> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> Cc: Lionel Landwerlin <lionel.g.landwerlin@linux.intel.com> Cc: Jordan Justen <jordan.l.justen@intel.com> Cc: Daniel Vetter <daniel.vetter@intel.com> Cc: Kenneth Graunke <kenneth@whitecape.org> Cc: Jason Ekstrand <jason@jlekstrand.net> Cc: Dave Airlie <airlied@gmail.com> Cc: dri-devel@lists.freedesktop.org Cc: mesa-dev@lists.freedesktop.org Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Link: https://patchwork.freedesktop.org/patch/msgid/20210429103056.407067-5-matthew.auld@intel.com
2021-04-29 10:30:52 +00:00
__u32 flags;
drm/i915/uapi: introduce drm_i915_gem_create_ext Same old gem_create but with now with extensions support. This is needed to support various upcoming usecases. v2:(Chris) - Use separate ioctl number for gem_create_ext, instead of hijacking the existing gem_create ioctl, otherwise we run into the issue with being unable to detect if the kernel supports the new extension behaviour. - We now have gem_create_ext.flags, which should be zeroed. - I915_GEM_CREATE_EXT_SETPARAM value is now zero, since this is the index into our array of extensions. - Setup a "vanilla" object which we can directly apply our extensions to. v3:(Daniel & Jason) - drop I915_GEM_CREATE_EXT_SETPARAM. Instead just have each extension do one thing only, instead of generic setparam which can cover various use cases. - add some kernel-doc. Signed-off-by: Matthew Auld <matthew.auld@intel.com> Signed-off-by: CQ Tang <cq.tang@intel.com> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> Cc: Lionel Landwerlin <lionel.g.landwerlin@linux.intel.com> Cc: Jordan Justen <jordan.l.justen@intel.com> Cc: Daniel Vetter <daniel.vetter@intel.com> Cc: Kenneth Graunke <kenneth@whitecape.org> Cc: Jason Ekstrand <jason@jlekstrand.net> Cc: Dave Airlie <airlied@gmail.com> Cc: dri-devel@lists.freedesktop.org Cc: mesa-dev@lists.freedesktop.org Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Link: https://patchwork.freedesktop.org/patch/msgid/20210429103056.407067-5-matthew.auld@intel.com
2021-04-29 10:30:52 +00:00
/**
* @extensions: The chain of extensions to apply to this object.
*
* This will be useful in the future when we need to support several
* different extensions, and we need to apply more than one when
* creating the object. See struct i915_user_extension.
*
* If we don't supply any extensions then we get the same old gem_create
* behaviour.
*
* For I915_GEM_CREATE_EXT_MEMORY_REGIONS usage see
* struct drm_i915_gem_create_ext_memory_regions.
drm/i915/pxp: interfaces for using protected objects This api allow user mode to create protected buffers and to mark contexts as making use of such objects. Only when using contexts marked in such a way is the execution guaranteed to work as expected. Contexts can only be marked as using protected content at creation time (i.e. the parameter is immutable) and they must be both bannable and not recoverable. Given that the protected session gets invalidated on suspend, contexts created this way hold a runtime pm wakeref until they're either destroyed or invalidated. All protected objects and contexts will be considered invalid when the PXP session is destroyed and all new submissions using them will be rejected. All intel contexts within the invalidated gem contexts will be marked banned. Userspace can detect that an invalidation has occurred via the RESET_STATS ioctl, where we report it the same way as a ban due to a hang. v5: squash patches, rebase on proto_ctx, update kerneldoc v6: rebase on obj create_ext changes v7: Use session counter to check if an object it valid, hold wakeref in context, don't add a new flag to RESET_STATS (Daniel) v8: don't increase guilty count for contexts banned during pxp invalidation (Rodrigo) v9: better comments, avoid wakeref put race between pxp_inval and context_close, add usage examples (Rodrigo) v10: modify internal set/get-protected-context functions to not return -ENODEV when setting PXP param to false or getting param when running on pxp-unsupported hw or getting param when i915 was built with CONFIG_PXP off Signed-off-by: Alan Previn <alan.previn.teres.alexis@intel.com> Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> Signed-off-by: Bommu Krishnaiah <krishnaiah.bommu@intel.com> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Cc: Jason Ekstrand <jason@jlekstrand.net> Cc: Daniel Vetter <daniel.vetter@intel.com> Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com> Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20210924191452.1539378-11-alan.previn.teres.alexis@intel.com
2021-09-24 19:14:45 +00:00
*
* For I915_GEM_CREATE_EXT_PROTECTED_CONTENT usage see
* struct drm_i915_gem_create_ext_protected_content.
drm/i915: Allow user to set cache at BO creation To comply with the design that buffer objects shall have immutable cache setting through out their life cycle, {set, get}_caching ioctl's are no longer supported from MTL onward. With that change caching policy can only be set at object creation time. The current code applies a default (platform dependent) cache setting for all objects. However this is not optimal for performance tuning. The patch extends the existing gem_create uAPI to let user set PAT index for the object at creation time. The new extension is platform independent, so UMD's can switch to using this extension for older platforms as well, while {set, get}_caching are still supported on these legacy paltforms for compatibility reason. However, since PAT index was not clearly defined for platforms prior to GEN12 (TGL), so we are limiting this externsion to GEN12+ platforms only. See ext_set_pat() in for the implementation details. The documentation related to the PAT/MOCS tables is currently available for Tiger Lake here: https://www.intel.com/content/www/us/en/docs/graphics-for-linux/developer-reference/1-0/tiger-lake.html The documentation for other platforms is currently being updated. BSpec: 45101 Mesa support has been submitted in this merge request: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22878 The media driver supprt has bin submitted in this merge request: https://github.com/intel/media-driver/pull/1680 The IGT test related to this change is igt@gem_create@create-ext-set-pat Signed-off-by: Fei Yang <fei.yang@intel.com> Cc: Chris Wilson <chris.p.wilson@linux.intel.com> Cc: Matt Roper <matthew.d.roper@intel.com> Cc: Andi Shyti <andi.shyti@linux.intel.com> Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com> Acked-by: Jordan Justen <jordan.l.justen@intel.com> Tested-by: Jordan Justen <jordan.l.justen@intel.com> Acked-by: Carl Zhang <carl.zhang@intel.com> Tested-by: Lihao Gu <lihao.gu@intel.com> Signed-off-by: Andi Shyti <andi.shyti@linux.intel.com> Acked-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Acked-by: Slawomir Milczarek <slawomir.milczarek@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20230606100042.482345-2-andi.shyti@linux.intel.com
2023-06-06 10:00:42 +00:00
*
* For I915_GEM_CREATE_EXT_SET_PAT usage see
* struct drm_i915_gem_create_ext_set_pat.
drm/i915/uapi: introduce drm_i915_gem_create_ext Same old gem_create but with now with extensions support. This is needed to support various upcoming usecases. v2:(Chris) - Use separate ioctl number for gem_create_ext, instead of hijacking the existing gem_create ioctl, otherwise we run into the issue with being unable to detect if the kernel supports the new extension behaviour. - We now have gem_create_ext.flags, which should be zeroed. - I915_GEM_CREATE_EXT_SETPARAM value is now zero, since this is the index into our array of extensions. - Setup a "vanilla" object which we can directly apply our extensions to. v3:(Daniel & Jason) - drop I915_GEM_CREATE_EXT_SETPARAM. Instead just have each extension do one thing only, instead of generic setparam which can cover various use cases. - add some kernel-doc. Signed-off-by: Matthew Auld <matthew.auld@intel.com> Signed-off-by: CQ Tang <cq.tang@intel.com> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> Cc: Lionel Landwerlin <lionel.g.landwerlin@linux.intel.com> Cc: Jordan Justen <jordan.l.justen@intel.com> Cc: Daniel Vetter <daniel.vetter@intel.com> Cc: Kenneth Graunke <kenneth@whitecape.org> Cc: Jason Ekstrand <jason@jlekstrand.net> Cc: Dave Airlie <airlied@gmail.com> Cc: dri-devel@lists.freedesktop.org Cc: mesa-dev@lists.freedesktop.org Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Link: https://patchwork.freedesktop.org/patch/msgid/20210429103056.407067-5-matthew.auld@intel.com
2021-04-29 10:30:52 +00:00
*/
#define I915_GEM_CREATE_EXT_MEMORY_REGIONS 0
drm/i915/pxp: interfaces for using protected objects This api allow user mode to create protected buffers and to mark contexts as making use of such objects. Only when using contexts marked in such a way is the execution guaranteed to work as expected. Contexts can only be marked as using protected content at creation time (i.e. the parameter is immutable) and they must be both bannable and not recoverable. Given that the protected session gets invalidated on suspend, contexts created this way hold a runtime pm wakeref until they're either destroyed or invalidated. All protected objects and contexts will be considered invalid when the PXP session is destroyed and all new submissions using them will be rejected. All intel contexts within the invalidated gem contexts will be marked banned. Userspace can detect that an invalidation has occurred via the RESET_STATS ioctl, where we report it the same way as a ban due to a hang. v5: squash patches, rebase on proto_ctx, update kerneldoc v6: rebase on obj create_ext changes v7: Use session counter to check if an object it valid, hold wakeref in context, don't add a new flag to RESET_STATS (Daniel) v8: don't increase guilty count for contexts banned during pxp invalidation (Rodrigo) v9: better comments, avoid wakeref put race between pxp_inval and context_close, add usage examples (Rodrigo) v10: modify internal set/get-protected-context functions to not return -ENODEV when setting PXP param to false or getting param when running on pxp-unsupported hw or getting param when i915 was built with CONFIG_PXP off Signed-off-by: Alan Previn <alan.previn.teres.alexis@intel.com> Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> Signed-off-by: Bommu Krishnaiah <krishnaiah.bommu@intel.com> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Cc: Jason Ekstrand <jason@jlekstrand.net> Cc: Daniel Vetter <daniel.vetter@intel.com> Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com> Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20210924191452.1539378-11-alan.previn.teres.alexis@intel.com
2021-09-24 19:14:45 +00:00
#define I915_GEM_CREATE_EXT_PROTECTED_CONTENT 1
drm/i915: Allow user to set cache at BO creation To comply with the design that buffer objects shall have immutable cache setting through out their life cycle, {set, get}_caching ioctl's are no longer supported from MTL onward. With that change caching policy can only be set at object creation time. The current code applies a default (platform dependent) cache setting for all objects. However this is not optimal for performance tuning. The patch extends the existing gem_create uAPI to let user set PAT index for the object at creation time. The new extension is platform independent, so UMD's can switch to using this extension for older platforms as well, while {set, get}_caching are still supported on these legacy paltforms for compatibility reason. However, since PAT index was not clearly defined for platforms prior to GEN12 (TGL), so we are limiting this externsion to GEN12+ platforms only. See ext_set_pat() in for the implementation details. The documentation related to the PAT/MOCS tables is currently available for Tiger Lake here: https://www.intel.com/content/www/us/en/docs/graphics-for-linux/developer-reference/1-0/tiger-lake.html The documentation for other platforms is currently being updated. BSpec: 45101 Mesa support has been submitted in this merge request: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22878 The media driver supprt has bin submitted in this merge request: https://github.com/intel/media-driver/pull/1680 The IGT test related to this change is igt@gem_create@create-ext-set-pat Signed-off-by: Fei Yang <fei.yang@intel.com> Cc: Chris Wilson <chris.p.wilson@linux.intel.com> Cc: Matt Roper <matthew.d.roper@intel.com> Cc: Andi Shyti <andi.shyti@linux.intel.com> Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com> Acked-by: Jordan Justen <jordan.l.justen@intel.com> Tested-by: Jordan Justen <jordan.l.justen@intel.com> Acked-by: Carl Zhang <carl.zhang@intel.com> Tested-by: Lihao Gu <lihao.gu@intel.com> Signed-off-by: Andi Shyti <andi.shyti@linux.intel.com> Acked-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Acked-by: Slawomir Milczarek <slawomir.milczarek@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20230606100042.482345-2-andi.shyti@linux.intel.com
2023-06-06 10:00:42 +00:00
#define I915_GEM_CREATE_EXT_SET_PAT 2
drm/i915/uapi: introduce drm_i915_gem_create_ext Same old gem_create but with now with extensions support. This is needed to support various upcoming usecases. v2:(Chris) - Use separate ioctl number for gem_create_ext, instead of hijacking the existing gem_create ioctl, otherwise we run into the issue with being unable to detect if the kernel supports the new extension behaviour. - We now have gem_create_ext.flags, which should be zeroed. - I915_GEM_CREATE_EXT_SETPARAM value is now zero, since this is the index into our array of extensions. - Setup a "vanilla" object which we can directly apply our extensions to. v3:(Daniel & Jason) - drop I915_GEM_CREATE_EXT_SETPARAM. Instead just have each extension do one thing only, instead of generic setparam which can cover various use cases. - add some kernel-doc. Signed-off-by: Matthew Auld <matthew.auld@intel.com> Signed-off-by: CQ Tang <cq.tang@intel.com> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> Cc: Lionel Landwerlin <lionel.g.landwerlin@linux.intel.com> Cc: Jordan Justen <jordan.l.justen@intel.com> Cc: Daniel Vetter <daniel.vetter@intel.com> Cc: Kenneth Graunke <kenneth@whitecape.org> Cc: Jason Ekstrand <jason@jlekstrand.net> Cc: Dave Airlie <airlied@gmail.com> Cc: dri-devel@lists.freedesktop.org Cc: mesa-dev@lists.freedesktop.org Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Link: https://patchwork.freedesktop.org/patch/msgid/20210429103056.407067-5-matthew.auld@intel.com
2021-04-29 10:30:52 +00:00
__u64 extensions;
};
/**
* struct drm_i915_gem_create_ext_memory_regions - The
* I915_GEM_CREATE_EXT_MEMORY_REGIONS extension.
*
* Set the object with the desired set of placements/regions in priority
* order. Each entry must be unique and supported by the device.
*
* This is provided as an array of struct drm_i915_gem_memory_class_instance, or
* an equivalent layout of class:instance pair encodings. See struct
* drm_i915_query_memory_regions and DRM_I915_QUERY_MEMORY_REGIONS for how to
* query the supported regions for a device.
*
* As an example, on discrete devices, if we wish to set the placement as
* device local-memory we can do something like:
*
* .. code-block:: C
*
* struct drm_i915_gem_memory_class_instance region_lmem = {
* .memory_class = I915_MEMORY_CLASS_DEVICE,
* .memory_instance = 0,
* };
* struct drm_i915_gem_create_ext_memory_regions regions = {
* .base = { .name = I915_GEM_CREATE_EXT_MEMORY_REGIONS },
* .regions = (uintptr_t)&region_lmem,
* .num_regions = 1,
* };
* struct drm_i915_gem_create_ext create_ext = {
* .size = 16 * PAGE_SIZE,
* .extensions = (uintptr_t)&regions,
* };
*
* int err = ioctl(fd, DRM_IOCTL_I915_GEM_CREATE_EXT, &create_ext);
* if (err) ...
*
* At which point we get the object handle in &drm_i915_gem_create_ext.handle,
* along with the final object size in &drm_i915_gem_create_ext.size, which
* should account for any rounding up, if required.
*
* Note that userspace has no means of knowing the current backing region
* for objects where @num_regions is larger than one. The kernel will only
* ensure that the priority order of the @regions array is honoured, either
* when initially placing the object, or when moving memory around due to
* memory pressure
*
* On Flat-CCS capable HW, compression is supported for the objects residing
* in I915_MEMORY_CLASS_DEVICE. When such objects (compressed) have other
* memory class in @regions and migrated (by i915, due to memory
* constraints) to the non I915_MEMORY_CLASS_DEVICE region, then i915 needs to
* decompress the content. But i915 doesn't have the required information to
* decompress the userspace compressed objects.
*
* So i915 supports Flat-CCS, on the objects which can reside only on
* I915_MEMORY_CLASS_DEVICE regions.
*/
struct drm_i915_gem_create_ext_memory_regions {
/** @base: Extension link. See struct i915_user_extension. */
struct i915_user_extension base;
/** @pad: MBZ */
__u32 pad;
/** @num_regions: Number of elements in the @regions array. */
__u32 num_regions;
/**
* @regions: The regions/placements array.
*
* An array of struct drm_i915_gem_memory_class_instance.
*/
__u64 regions;
};
drm/i915/pxp: interfaces for using protected objects This api allow user mode to create protected buffers and to mark contexts as making use of such objects. Only when using contexts marked in such a way is the execution guaranteed to work as expected. Contexts can only be marked as using protected content at creation time (i.e. the parameter is immutable) and they must be both bannable and not recoverable. Given that the protected session gets invalidated on suspend, contexts created this way hold a runtime pm wakeref until they're either destroyed or invalidated. All protected objects and contexts will be considered invalid when the PXP session is destroyed and all new submissions using them will be rejected. All intel contexts within the invalidated gem contexts will be marked banned. Userspace can detect that an invalidation has occurred via the RESET_STATS ioctl, where we report it the same way as a ban due to a hang. v5: squash patches, rebase on proto_ctx, update kerneldoc v6: rebase on obj create_ext changes v7: Use session counter to check if an object it valid, hold wakeref in context, don't add a new flag to RESET_STATS (Daniel) v8: don't increase guilty count for contexts banned during pxp invalidation (Rodrigo) v9: better comments, avoid wakeref put race between pxp_inval and context_close, add usage examples (Rodrigo) v10: modify internal set/get-protected-context functions to not return -ENODEV when setting PXP param to false or getting param when running on pxp-unsupported hw or getting param when i915 was built with CONFIG_PXP off Signed-off-by: Alan Previn <alan.previn.teres.alexis@intel.com> Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> Signed-off-by: Bommu Krishnaiah <krishnaiah.bommu@intel.com> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Cc: Jason Ekstrand <jason@jlekstrand.net> Cc: Daniel Vetter <daniel.vetter@intel.com> Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com> Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20210924191452.1539378-11-alan.previn.teres.alexis@intel.com
2021-09-24 19:14:45 +00:00
/**
* struct drm_i915_gem_create_ext_protected_content - The
* I915_OBJECT_PARAM_PROTECTED_CONTENT extension.
*
* If this extension is provided, buffer contents are expected to be protected
* by PXP encryption and require decryption for scan out and processing. This
* is only possible on platforms that have PXP enabled, on all other scenarios
* using this extension will cause the ioctl to fail and return -ENODEV. The
* flags parameter is reserved for future expansion and must currently be set
* to zero.
*
* The buffer contents are considered invalid after a PXP session teardown.
*
* The encryption is guaranteed to be processed correctly only if the object
* is submitted with a context created using the
* I915_CONTEXT_PARAM_PROTECTED_CONTENT flag. This will also enable extra checks
* at submission time on the validity of the objects involved.
*
* Below is an example on how to create a protected object:
*
* .. code-block:: C
*
* struct drm_i915_gem_create_ext_protected_content protected_ext = {
* .base = { .name = I915_GEM_CREATE_EXT_PROTECTED_CONTENT },
* .flags = 0,
* };
* struct drm_i915_gem_create_ext create_ext = {
* .size = PAGE_SIZE,
* .extensions = (uintptr_t)&protected_ext,
* };
*
* int err = ioctl(fd, DRM_IOCTL_I915_GEM_CREATE_EXT, &create_ext);
* if (err) ...
*/
struct drm_i915_gem_create_ext_protected_content {
/** @base: Extension link. See struct i915_user_extension. */
struct i915_user_extension base;
/** @flags: reserved for future usage, currently MBZ */
__u32 flags;
};
drm/i915: Allow user to set cache at BO creation To comply with the design that buffer objects shall have immutable cache setting through out their life cycle, {set, get}_caching ioctl's are no longer supported from MTL onward. With that change caching policy can only be set at object creation time. The current code applies a default (platform dependent) cache setting for all objects. However this is not optimal for performance tuning. The patch extends the existing gem_create uAPI to let user set PAT index for the object at creation time. The new extension is platform independent, so UMD's can switch to using this extension for older platforms as well, while {set, get}_caching are still supported on these legacy paltforms for compatibility reason. However, since PAT index was not clearly defined for platforms prior to GEN12 (TGL), so we are limiting this externsion to GEN12+ platforms only. See ext_set_pat() in for the implementation details. The documentation related to the PAT/MOCS tables is currently available for Tiger Lake here: https://www.intel.com/content/www/us/en/docs/graphics-for-linux/developer-reference/1-0/tiger-lake.html The documentation for other platforms is currently being updated. BSpec: 45101 Mesa support has been submitted in this merge request: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22878 The media driver supprt has bin submitted in this merge request: https://github.com/intel/media-driver/pull/1680 The IGT test related to this change is igt@gem_create@create-ext-set-pat Signed-off-by: Fei Yang <fei.yang@intel.com> Cc: Chris Wilson <chris.p.wilson@linux.intel.com> Cc: Matt Roper <matthew.d.roper@intel.com> Cc: Andi Shyti <andi.shyti@linux.intel.com> Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com> Acked-by: Jordan Justen <jordan.l.justen@intel.com> Tested-by: Jordan Justen <jordan.l.justen@intel.com> Acked-by: Carl Zhang <carl.zhang@intel.com> Tested-by: Lihao Gu <lihao.gu@intel.com> Signed-off-by: Andi Shyti <andi.shyti@linux.intel.com> Acked-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Acked-by: Slawomir Milczarek <slawomir.milczarek@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20230606100042.482345-2-andi.shyti@linux.intel.com
2023-06-06 10:00:42 +00:00
/**
* struct drm_i915_gem_create_ext_set_pat - The
* I915_GEM_CREATE_EXT_SET_PAT extension.
*
* If this extension is provided, the specified caching policy (PAT index) is
* applied to the buffer object.
*
* Below is an example on how to create an object with specific caching policy:
*
* .. code-block:: C
*
* struct drm_i915_gem_create_ext_set_pat set_pat_ext = {
* .base = { .name = I915_GEM_CREATE_EXT_SET_PAT },
* .pat_index = 0,
* };
* struct drm_i915_gem_create_ext create_ext = {
* .size = PAGE_SIZE,
* .extensions = (uintptr_t)&set_pat_ext,
* };
*
* int err = ioctl(fd, DRM_IOCTL_I915_GEM_CREATE_EXT, &create_ext);
* if (err) ...
*/
struct drm_i915_gem_create_ext_set_pat {
/** @base: Extension link. See struct i915_user_extension. */
struct i915_user_extension base;
/**
* @pat_index: PAT index to be set
* PAT index is a bit field in Page Table Entry to control caching
* behaviors for GPU accesses. The definition of PAT index is
* platform dependent and can be found in hardware specifications,
*/
__u32 pat_index;
/** @rsvd: reserved for future use */
__u32 rsvd;
};
/* ID of the protected content session managed by i915 when PXP is active */
#define I915_PROTECTED_CONTENT_DEFAULT_SESSION 0xf
#if defined(__cplusplus)
}
#endif
#endif /* _UAPI_I915_DRM_H_ */