Merge tag 'drm-intel-gt-next-2024-04-26' of https://anongit.freedesktop.org/git/drm/drm-intel into drm-next

UAPI Changes:

- drm/i915/guc: Use context hints for GT frequency

    Allow user to provide a low latency context hint. When set, KMD
    sends a hint to GuC which results in special handling for this
    context. SLPC will ramp the GT frequency aggressively every time
    it switches to this context. The down freq threshold will also be
    lower so GuC will ramp down the GT freq for this context more slowly.
    We also disable waitboost for this context as that will interfere with
    the strategy.

    We need to enable the use of SLPC Compute strategy during init, but
    it will apply only to contexts that set this bit during context
    creation.

    Userland can check whether this feature is supported using a new param-
    I915_PARAM_HAS_CONTEXT_FREQ_HINT. This flag is true for all guc submission
    enabled platforms as they use SLPC for frequency management.

    The Mesa usage model for this flag is here -
    https://gitlab.freedesktop.org/sushmave/mesa/-/commits/compute_hint

- drm/i915/gt: Enable only one CCS for compute workload

    Enable only one CCS engine by default with all the compute sices
    allocated to it.

    While generating the list of UABI engines to be exposed to the
    user, exclude any additional CCS engines beyond the first
    instance

    ***

    NOTE: This W/A will make all DG2 SKUs appear like single CCS SKUs by
    default to mitigate a hardware bug. All the EUs will still remain
    usable, and all the userspace drivers have been confirmed to be able
    to dynamically detect the change in number of CCS engines and adjust.

    For the smaller percent of applications that get perf benefit from
    letting the userspace driver dispatch across all 4 CCS engines we will
    be introducing a sysfs control as a later patch to choose 4 CCS each
    with 25% EUs (or 50% if 2 CCS).

    NOTE: A regression has been reported at

    https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/10895

    However Andi has been triaging the issue and we're closing in a fix
    to the gap in the W/A implementation:

    https://lists.freedesktop.org/archives/intel-gfx/2024-April/348747.html

Driver Changes:

- Add new and fix to existing workarounds: Wa_14018575942 (MTL),
  Wa_16019325821 (Gen12.70), Wa_14019159160 (MTL), Wa_16015675438,
  Wa_14020495402 (Gen12.70) (Tejas, John, Lucas)
- Fix UAF on destroy against retire race and remove two earlier
  partial fixes (Janusz)
- Limit the reserved VM space to only the platforms that need it (Andi)
- Reset queue_priority_hint on parking for execlist platforms (Chris)
- Fix gt reset with GuC submission is disabled (Nirmoy)
- Correct capture of EIR register on hang (John)

- Remove usage of the deprecated ida_simple_xx() API
- Refactor confusing __intel_gt_reset() (Nirmoy)
- Fix the fix for GuC reset lock confusion (John)
- Simplify/extend platform check for Wa_14018913170 (John)
- Replace dev_priv with i915 (Andi)
- Add and use gt_to_guc() wrapper (Andi)
- Remove bogus null check (Rodrigo, Dan)

. Selftest improvements (Janusz, Nirmoy, Daniele)

Signed-off-by: Dave Airlie <airlied@redhat.com>

From: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/ZitVBTvZmityDi7D@jlahtine-mobl.ger.corp.intel.com
This commit is contained in:
Dave Airlie 2024-04-30 14:20:31 +10:00
commit 68b89e23c2
54 changed files with 414 additions and 156 deletions

View file

@ -879,6 +879,7 @@ static int set_proto_ctx_param(struct drm_i915_file_private *fpriv,
struct i915_gem_proto_context *pc,
struct drm_i915_gem_context_param *args)
{
struct drm_i915_private *i915 = fpriv->i915;
int ret = 0;
switch (args->param) {
@ -904,6 +905,13 @@ static int set_proto_ctx_param(struct drm_i915_file_private *fpriv,
pc->user_flags &= ~BIT(UCONTEXT_BANNABLE);
break;
case I915_CONTEXT_PARAM_LOW_LATENCY:
if (intel_uc_uses_guc_submission(&to_gt(i915)->uc))
pc->user_flags |= BIT(UCONTEXT_LOW_LATENCY);
else
ret = -EINVAL;
break;
case I915_CONTEXT_PARAM_RECOVERABLE:
if (args->size)
ret = -EINVAL;
@ -992,6 +1000,9 @@ static int intel_context_set_gem(struct intel_context *ce,
if (sseu.slice_mask && !WARN_ON(ce->engine->class != RENDER_CLASS))
ret = intel_context_reconfigure_sseu(ce, sseu);
if (test_bit(UCONTEXT_LOW_LATENCY, &ctx->user_flags))
__set_bit(CONTEXT_LOW_LATENCY, &ce->flags);
return ret;
}
@ -1630,6 +1641,9 @@ i915_gem_create_context(struct drm_i915_private *i915,
if (vm)
ctx->vm = vm;
/* Assign early so intel_context_set_gem can access these flags */
ctx->user_flags = pc->user_flags;
mutex_init(&ctx->engines_mutex);
if (pc->num_user_engines >= 0) {
i915_gem_context_set_user_engines(ctx);
@ -1652,8 +1666,6 @@ i915_gem_create_context(struct drm_i915_private *i915,
* is no remap info, it will be a NOP. */
ctx->remap_slice = ALL_L3_SLICES(i915);
ctx->user_flags = pc->user_flags;
for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++)
ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES;

View file

@ -338,6 +338,7 @@ struct i915_gem_context {
#define UCONTEXT_BANNABLE 2
#define UCONTEXT_RECOVERABLE 3
#define UCONTEXT_PERSISTENCE 4
#define UCONTEXT_LOW_LATENCY 5
/**
* @flags: small set of booleans

View file

@ -255,7 +255,6 @@ struct i915_execbuffer {
struct intel_context *context; /* logical state for the request */
struct i915_gem_context *gem_context; /** caller's context */
intel_wakeref_t wakeref;
intel_wakeref_t wakeref_gt0;
/** our requests to build */
struct i915_request *requests[MAX_ENGINE_INSTANCE + 1];
@ -2457,7 +2456,7 @@ static int eb_submit(struct i915_execbuffer *eb)
* The engine index is returned.
*/
static unsigned int
gen8_dispatch_bsd_engine(struct drm_i915_private *dev_priv,
gen8_dispatch_bsd_engine(struct drm_i915_private *i915,
struct drm_file *file)
{
struct drm_i915_file_private *file_priv = file->driver_priv;
@ -2465,7 +2464,7 @@ gen8_dispatch_bsd_engine(struct drm_i915_private *dev_priv,
/* Check whether the file_priv has already selected one ring. */
if ((int)file_priv->bsd_engine < 0)
file_priv->bsd_engine =
get_random_u32_below(dev_priv->engine_uabi_class_count[I915_ENGINE_CLASS_VIDEO]);
get_random_u32_below(i915->engine_uabi_class_count[I915_ENGINE_CLASS_VIDEO]);
return file_priv->bsd_engine;
}
@ -2686,7 +2685,6 @@ static int
eb_select_engine(struct i915_execbuffer *eb)
{
struct intel_context *ce, *child;
struct intel_gt *gt;
unsigned int idx;
int err;
@ -2710,17 +2708,10 @@ eb_select_engine(struct i915_execbuffer *eb)
}
}
eb->num_batches = ce->parallel.number_children + 1;
gt = ce->engine->gt;
for_each_child(ce, child)
intel_context_get(child);
eb->wakeref = intel_gt_pm_get(ce->engine->gt);
/*
* Keep GT0 active on MTL so that i915_vma_parked() doesn't
* free VMAs while execbuf ioctl is validating VMAs.
*/
if (gt->info.id)
eb->wakeref_gt0 = intel_gt_pm_get(to_gt(gt->i915));
if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) {
err = intel_context_alloc_state(ce);
@ -2759,9 +2750,6 @@ eb_select_engine(struct i915_execbuffer *eb)
return err;
err:
if (gt->info.id)
intel_gt_pm_put(to_gt(gt->i915), eb->wakeref_gt0);
intel_gt_pm_put(ce->engine->gt, eb->wakeref);
for_each_child(ce, child)
intel_context_put(child);
@ -2775,12 +2763,6 @@ eb_put_engine(struct i915_execbuffer *eb)
struct intel_context *child;
i915_vm_put(eb->context->vm);
/*
* This works in conjunction with eb_select_engine() to prevent
* i915_vma_parked() from interfering while execbuf validates vmas.
*/
if (eb->gt->info.id)
intel_gt_pm_put(to_gt(eb->gt->i915), eb->wakeref_gt0);
intel_gt_pm_put(eb->context->engine->gt, eb->wakeref);
for_each_child(eb->context, child)
intel_context_put(child);

View file

@ -654,7 +654,7 @@ i915_gem_object_create_shmem(struct drm_i915_private *i915,
/* Allocate a new GEM object and fill it with the supplied data */
struct drm_i915_gem_object *
i915_gem_object_create_shmem_from_data(struct drm_i915_private *dev_priv,
i915_gem_object_create_shmem_from_data(struct drm_i915_private *i915,
const void *data, resource_size_t size)
{
struct drm_i915_gem_object *obj;
@ -663,8 +663,8 @@ i915_gem_object_create_shmem_from_data(struct drm_i915_private *dev_priv,
resource_size_t offset;
int err;
GEM_WARN_ON(IS_DGFX(dev_priv));
obj = i915_gem_object_create_shmem(dev_priv, round_up(size, PAGE_SIZE));
GEM_WARN_ON(IS_DGFX(i915));
obj = i915_gem_object_create_shmem(i915, round_up(size, PAGE_SIZE));
if (IS_ERR(obj))
return obj;

View file

@ -14,14 +14,14 @@ struct drm_i915_gem_object;
#define i915_stolen_fb drm_mm_node
int i915_gem_stolen_insert_node(struct drm_i915_private *dev_priv,
int i915_gem_stolen_insert_node(struct drm_i915_private *i915,
struct drm_mm_node *node, u64 size,
unsigned alignment);
int i915_gem_stolen_insert_node_in_range(struct drm_i915_private *dev_priv,
int i915_gem_stolen_insert_node_in_range(struct drm_i915_private *i915,
struct drm_mm_node *node, u64 size,
unsigned alignment, u64 start,
u64 end);
void i915_gem_stolen_remove_node(struct drm_i915_private *dev_priv,
void i915_gem_stolen_remove_node(struct drm_i915_private *i915,
struct drm_mm_node *node);
struct intel_memory_region *
i915_gem_stolen_smem_setup(struct drm_i915_private *i915, u16 type,
@ -31,7 +31,7 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type,
u16 instance);
struct drm_i915_gem_object *
i915_gem_object_create_stolen(struct drm_i915_private *dev_priv,
i915_gem_object_create_stolen(struct drm_i915_private *i915,
resource_size_t size);
bool i915_gem_object_is_stolen(const struct drm_i915_gem_object *obj);

View file

@ -343,12 +343,12 @@ int
i915_gem_set_tiling_ioctl(struct drm_device *dev, void *data,
struct drm_file *file)
{
struct drm_i915_private *dev_priv = to_i915(dev);
struct drm_i915_private *i915 = to_i915(dev);
struct drm_i915_gem_set_tiling *args = data;
struct drm_i915_gem_object *obj;
int err;
if (!to_gt(dev_priv)->ggtt->num_fences)
if (!to_gt(i915)->ggtt->num_fences)
return -EOPNOTSUPP;
obj = i915_gem_object_lookup(file, args->handle);
@ -374,9 +374,9 @@ i915_gem_set_tiling_ioctl(struct drm_device *dev, void *data,
args->stride = 0;
} else {
if (args->tiling_mode == I915_TILING_X)
args->swizzle_mode = to_gt(dev_priv)->ggtt->bit_6_swizzle_x;
args->swizzle_mode = to_gt(i915)->ggtt->bit_6_swizzle_x;
else
args->swizzle_mode = to_gt(dev_priv)->ggtt->bit_6_swizzle_y;
args->swizzle_mode = to_gt(i915)->ggtt->bit_6_swizzle_y;
/* Hide bit 17 swizzling from the user. This prevents old Mesa
* from aborting the application on sw fallbacks to bit 17,
@ -427,11 +427,11 @@ i915_gem_get_tiling_ioctl(struct drm_device *dev, void *data,
struct drm_file *file)
{
struct drm_i915_gem_get_tiling *args = data;
struct drm_i915_private *dev_priv = to_i915(dev);
struct drm_i915_private *i915 = to_i915(dev);
struct drm_i915_gem_object *obj;
int err = -ENOENT;
if (!to_gt(dev_priv)->ggtt->num_fences)
if (!to_gt(i915)->ggtt->num_fences)
return -EOPNOTSUPP;
rcu_read_lock();
@ -447,10 +447,10 @@ i915_gem_get_tiling_ioctl(struct drm_device *dev, void *data,
switch (args->tiling_mode) {
case I915_TILING_X:
args->swizzle_mode = to_gt(dev_priv)->ggtt->bit_6_swizzle_x;
args->swizzle_mode = to_gt(i915)->ggtt->bit_6_swizzle_x;
break;
case I915_TILING_Y:
args->swizzle_mode = to_gt(dev_priv)->ggtt->bit_6_swizzle_y;
args->swizzle_mode = to_gt(i915)->ggtt->bit_6_swizzle_y;
break;
default:
case I915_TILING_NONE:
@ -459,7 +459,7 @@ i915_gem_get_tiling_ioctl(struct drm_device *dev, void *data,
}
/* Hide bit 17 from the user -- see comment in i915_gem_set_tiling */
if (dev_priv->gem_quirks & GEM_QUIRK_PIN_SWIZZLED_PAGES)
if (i915->gem_quirks & GEM_QUIRK_PIN_SWIZZLED_PAGES)
args->phys_swizzle_mode = I915_BIT_6_SWIZZLE_UNKNOWN;
else
args->phys_swizzle_mode = args->swizzle_mode;

View file

@ -463,13 +463,13 @@ i915_gem_userptr_ioctl(struct drm_device *dev,
struct drm_file *file)
{
static struct lock_class_key __maybe_unused lock_class;
struct drm_i915_private *dev_priv = to_i915(dev);
struct drm_i915_private *i915 = to_i915(dev);
struct drm_i915_gem_userptr *args = data;
struct drm_i915_gem_object __maybe_unused *obj;
int __maybe_unused ret;
u32 __maybe_unused handle;
if (!HAS_LLC(dev_priv) && !HAS_SNOOP(dev_priv)) {
if (!HAS_LLC(i915) && !HAS_SNOOP(i915)) {
/* We cannot support coherent userptr objects on hw without
* LLC and broken snooping.
*/
@ -501,7 +501,7 @@ i915_gem_userptr_ioctl(struct drm_device *dev,
* On almost all of the older hw, we cannot tell the GPU that
* a page is readonly.
*/
if (!to_gt(dev_priv)->vm->has_read_only)
if (!to_gt(i915)->vm->has_read_only)
return -ENODEV;
}

View file

@ -1969,19 +1969,19 @@ int i915_gem_huge_page_mock_selftests(void)
SUBTEST(igt_mock_memory_region_huge_pages),
SUBTEST(igt_mock_ppgtt_misaligned_dma),
};
struct drm_i915_private *dev_priv;
struct drm_i915_private *i915;
struct i915_ppgtt *ppgtt;
int err;
dev_priv = mock_gem_device();
if (!dev_priv)
i915 = mock_gem_device();
if (!i915)
return -ENOMEM;
/* Pretend to be a device which supports the 48b PPGTT */
RUNTIME_INFO(dev_priv)->ppgtt_type = INTEL_PPGTT_FULL;
RUNTIME_INFO(dev_priv)->ppgtt_size = 48;
RUNTIME_INFO(i915)->ppgtt_type = INTEL_PPGTT_FULL;
RUNTIME_INFO(i915)->ppgtt_size = 48;
ppgtt = i915_ppgtt_create(to_gt(dev_priv), 0);
ppgtt = i915_ppgtt_create(to_gt(i915), 0);
if (IS_ERR(ppgtt)) {
err = PTR_ERR(ppgtt);
goto out_unlock;
@ -2005,7 +2005,7 @@ int i915_gem_huge_page_mock_selftests(void)
out_put:
i915_vm_put(&ppgtt->vm);
out_unlock:
mock_destroy_device(dev_priv);
mock_destroy_device(i915);
return err;
}

View file

@ -7,6 +7,7 @@
#include "i915_drv.h"
#include "i915_selftest.h"
#include "gem/i915_gem_context.h"
#include "gt/intel_gt.h"
#include "mock_context.h"
#include "mock_dmabuf.h"
@ -155,6 +156,7 @@ static int verify_access(struct drm_i915_private *i915,
struct file *file;
u32 *vaddr;
int err = 0, i;
unsigned int mode;
file = mock_file(i915);
if (IS_ERR(file))
@ -194,7 +196,8 @@ static int verify_access(struct drm_i915_private *i915,
if (err)
goto out_file;
vaddr = i915_gem_object_pin_map_unlocked(native_obj, I915_MAP_WB);
mode = intel_gt_coherent_map_type(to_gt(i915), native_obj, true);
vaddr = i915_gem_object_pin_map_unlocked(native_obj, mode);
if (IS_ERR(vaddr)) {
err = PTR_ERR(vaddr);
goto out_file;

View file

@ -740,21 +740,25 @@ static u32 *gen12_emit_preempt_busywait(struct i915_request *rq, u32 *cs)
}
/* Wa_14014475959:dg2 */
#define CCS_SEMAPHORE_PPHWSP_OFFSET 0x540
static u32 ccs_semaphore_offset(struct i915_request *rq)
/* Wa_16019325821 */
/* Wa_14019159160 */
#define HOLD_SWITCHOUT_SEMAPHORE_PPHWSP_OFFSET 0x540
static u32 hold_switchout_semaphore_offset(struct i915_request *rq)
{
return i915_ggtt_offset(rq->context->state) +
(LRC_PPHWSP_PN * PAGE_SIZE) + CCS_SEMAPHORE_PPHWSP_OFFSET;
(LRC_PPHWSP_PN * PAGE_SIZE) + HOLD_SWITCHOUT_SEMAPHORE_PPHWSP_OFFSET;
}
/* Wa_14014475959:dg2 */
static u32 *ccs_emit_wa_busywait(struct i915_request *rq, u32 *cs)
/* Wa_16019325821 */
/* Wa_14019159160 */
static u32 *hold_switchout_emit_wa_busywait(struct i915_request *rq, u32 *cs)
{
int i;
*cs++ = MI_ATOMIC_INLINE | MI_ATOMIC_GLOBAL_GTT | MI_ATOMIC_CS_STALL |
MI_ATOMIC_MOVE;
*cs++ = ccs_semaphore_offset(rq);
*cs++ = hold_switchout_semaphore_offset(rq);
*cs++ = 0;
*cs++ = 1;
@ -770,7 +774,7 @@ static u32 *ccs_emit_wa_busywait(struct i915_request *rq, u32 *cs)
MI_SEMAPHORE_POLL |
MI_SEMAPHORE_SAD_EQ_SDD;
*cs++ = 0;
*cs++ = ccs_semaphore_offset(rq);
*cs++ = hold_switchout_semaphore_offset(rq);
*cs++ = 0;
return cs;
@ -787,8 +791,10 @@ gen12_emit_fini_breadcrumb_tail(struct i915_request *rq, u32 *cs)
cs = gen12_emit_preempt_busywait(rq, cs);
/* Wa_14014475959:dg2 */
if (intel_engine_uses_wa_hold_ccs_switchout(rq->engine))
cs = ccs_emit_wa_busywait(rq, cs);
/* Wa_16019325821 */
/* Wa_14019159160 */
if (intel_engine_uses_wa_hold_switchout(rq->engine))
cs = hold_switchout_emit_wa_busywait(rq, cs);
rq->tail = intel_ring_offset(rq, cs);
assert_ring_tail_valid(rq->ring, rq->tail);

View file

@ -130,6 +130,7 @@ struct intel_context {
#define CONTEXT_PERMA_PIN 11
#define CONTEXT_IS_PARKING 12
#define CONTEXT_EXITING 13
#define CONTEXT_LOW_LATENCY 14
struct {
u64 timeout_us;

View file

@ -588,7 +588,7 @@ u64 intel_clamp_preempt_timeout_ms(struct intel_engine_cs *engine, u64 value)
* NB: The GuC API only supports 32bit values. However, the limit is further
* reduced due to internal calculations which would otherwise overflow.
*/
if (intel_guc_submission_is_wanted(&engine->gt->uc.guc))
if (intel_guc_submission_is_wanted(gt_to_guc(engine->gt)))
value = min_t(u64, value, guc_policy_max_preempt_timeout_ms());
value = min_t(u64, value, jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT));
@ -609,7 +609,7 @@ u64 intel_clamp_timeslice_duration_ms(struct intel_engine_cs *engine, u64 value)
* NB: The GuC API only supports 32bit values. However, the limit is further
* reduced due to internal calculations which would otherwise overflow.
*/
if (intel_guc_submission_is_wanted(&engine->gt->uc.guc))
if (intel_guc_submission_is_wanted(gt_to_guc(engine->gt)))
value = min_t(u64, value, guc_policy_max_exec_quantum_ms());
value = min_t(u64, value, jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT));
@ -678,7 +678,7 @@ void intel_engines_release(struct intel_gt *gt)
*/
GEM_BUG_ON(intel_gt_pm_is_awake(gt));
if (!INTEL_INFO(gt->i915)->gpu_reset_clobbers_display)
__intel_gt_reset(gt, ALL_ENGINES);
intel_gt_reset_all_engines(gt);
/* Decouple the backend; but keep the layout for late GPU resets */
for_each_engine(engine, gt, id) {

View file

@ -586,7 +586,7 @@ struct intel_engine_cs {
#define I915_ENGINE_HAS_RCS_REG_STATE BIT(9)
#define I915_ENGINE_HAS_EU_PRIORITY BIT(10)
#define I915_ENGINE_FIRST_RENDER_COMPUTE BIT(11)
#define I915_ENGINE_USES_WA_HOLD_CCS_SWITCHOUT BIT(12)
#define I915_ENGINE_USES_WA_HOLD_SWITCHOUT BIT(12)
unsigned int flags;
/*
@ -696,10 +696,12 @@ intel_engine_has_relative_mmio(const struct intel_engine_cs * const engine)
}
/* Wa_14014475959:dg2 */
/* Wa_16019325821 */
/* Wa_14019159160 */
static inline bool
intel_engine_uses_wa_hold_ccs_switchout(struct intel_engine_cs *engine)
intel_engine_uses_wa_hold_switchout(struct intel_engine_cs *engine)
{
return engine->flags & I915_ENGINE_USES_WA_HOLD_CCS_SWITCHOUT;
return engine->flags & I915_ENGINE_USES_WA_HOLD_SWITCHOUT;
}
#endif /* __INTEL_ENGINE_TYPES_H__ */

View file

@ -2898,7 +2898,7 @@ static void enable_error_interrupt(struct intel_engine_cs *engine)
drm_err(&engine->i915->drm,
"engine '%s' resumed still in error: %08x\n",
engine->name, status);
__intel_gt_reset(engine->gt, engine->mask);
intel_gt_reset_engine(engine);
}
/*

View file

@ -231,11 +231,8 @@ static void guc_ggtt_ct_invalidate(struct intel_gt *gt)
struct intel_uncore *uncore = gt->uncore;
intel_wakeref_t wakeref;
with_intel_runtime_pm_if_active(uncore->rpm, wakeref) {
struct intel_guc *guc = &gt->uc.guc;
intel_guc_invalidate_tlb_guc(guc);
}
with_intel_runtime_pm_if_active(uncore->rpm, wakeref)
intel_guc_invalidate_tlb_guc(gt_to_guc(gt));
}
static void guc_ggtt_invalidate(struct i915_ggtt *ggtt)
@ -246,7 +243,7 @@ static void guc_ggtt_invalidate(struct i915_ggtt *ggtt)
gen8_ggtt_invalidate(ggtt);
list_for_each_entry(gt, &ggtt->gt_list, ggtt_link) {
if (intel_guc_tlb_invalidation_is_available(&gt->uc.guc))
if (intel_guc_tlb_invalidation_is_available(gt_to_guc(gt)))
guc_ggtt_ct_invalidate(gt);
else if (GRAPHICS_VER(i915) >= 12)
intel_uncore_write_fw(gt->uncore,

View file

@ -832,7 +832,7 @@ void intel_gt_driver_unregister(struct intel_gt *gt)
/* Scrub all HW state upon release */
with_intel_runtime_pm(gt->uncore->rpm, wakeref)
__intel_gt_reset(gt, ALL_ENGINES);
intel_gt_reset_all_engines(gt);
}
void intel_gt_driver_release(struct intel_gt *gt)

View file

@ -124,6 +124,11 @@ static inline struct drm_i915_private *guc_to_i915(struct intel_guc *guc)
return guc_to_gt(guc)->i915;
}
static inline struct intel_guc *gt_to_guc(struct intel_gt *gt)
{
return &gt->uc.guc;
}
void intel_gt_common_init_early(struct intel_gt *gt);
int intel_root_gt_init_early(struct drm_i915_private *i915);
int intel_gt_assign_ggtt(struct intel_gt *gt);

View file

@ -68,9 +68,9 @@ gen11_other_irq_handler(struct intel_gt *gt, const u8 instance,
struct intel_gt *media_gt = gt->i915->media_gt;
if (instance == OTHER_GUC_INSTANCE)
return guc_irq_handler(&gt->uc.guc, iir);
return guc_irq_handler(gt_to_guc(gt), iir);
if (instance == OTHER_MEDIA_GUC_INSTANCE && media_gt)
return guc_irq_handler(&media_gt->uc.guc, iir);
return guc_irq_handler(gt_to_guc(media_gt), iir);
if (instance == OTHER_GTPM_INSTANCE)
return gen11_rps_irq_handler(&gt->rps, iir);
@ -442,7 +442,7 @@ void gen8_gt_irq_handler(struct intel_gt *gt, u32 master_ctl)
iir = raw_reg_read(regs, GEN8_GT_IIR(2));
if (likely(iir)) {
gen6_rps_irq_handler(&gt->rps, iir);
guc_irq_handler(&gt->uc.guc, iir >> 16);
guc_irq_handler(gt_to_guc(gt), iir >> 16);
raw_reg_write(regs, GEN8_GT_IIR(2), iir);
}
}

View file

@ -159,7 +159,7 @@ static bool reset_engines(struct intel_gt *gt)
if (INTEL_INFO(gt->i915)->gpu_reset_clobbers_display)
return false;
return __intel_gt_reset(gt, ALL_ENGINES) == 0;
return intel_gt_reset_all_engines(gt) == 0;
}
static void gt_sanitize(struct intel_gt *gt, bool force)

View file

@ -534,7 +534,7 @@ static bool rps_eval(void *data)
{
struct intel_gt *gt = data;
if (intel_guc_slpc_is_used(&gt->uc.guc))
if (intel_guc_slpc_is_used(gt_to_guc(gt)))
return false;
else
return HAS_RPS(gt->i915);

View file

@ -1161,6 +1161,7 @@
#define GEN12_DISABLE_EARLY_READ REG_BIT(14)
#define GEN12_ENABLE_LARGE_GRF_MODE REG_BIT(12)
#define GEN12_PUSH_CONST_DEREF_HOLD_DIS REG_BIT(8)
#define XELPG_DISABLE_TDL_SVHS_GATING REG_BIT(1)
#define GEN12_DISABLE_DOP_GATING REG_BIT(0)
#define RT_CTRL MCR_REG(0xe530)

View file

@ -442,7 +442,7 @@ static ssize_t slpc_ignore_eff_freq_show(struct kobject *kobj,
char *buff)
{
struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name);
struct intel_guc_slpc *slpc = &gt->uc.guc.slpc;
struct intel_guc_slpc *slpc = &gt_to_guc(gt)->slpc;
return sysfs_emit(buff, "%u\n", slpc->ignore_eff_freq);
}
@ -452,7 +452,7 @@ static ssize_t slpc_ignore_eff_freq_store(struct kobject *kobj,
const char *buff, size_t count)
{
struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name);
struct intel_guc_slpc *slpc = &gt->uc.guc.slpc;
struct intel_guc_slpc *slpc = &gt_to_guc(gt)->slpc;
int err;
u32 val;
@ -595,7 +595,7 @@ static ssize_t media_freq_factor_store(struct kobject *kobj,
const char *buff, size_t count)
{
struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name);
struct intel_guc_slpc *slpc = &gt->uc.guc.slpc;
struct intel_guc_slpc *slpc = &gt_to_guc(gt)->slpc;
u32 factor, mode;
int err;

View file

@ -109,7 +109,7 @@ static void gen11_rc6_enable(struct intel_rc6 *rc6)
* thus allowing GuC to control RC6 entry/exit fully instead.
* We will not set the HW ENABLE and EI bits
*/
if (!intel_guc_rc_enable(&gt->uc.guc))
if (!intel_guc_rc_enable(gt_to_guc(gt)))
rc6->ctl_enable = GEN6_RC_CTL_RC6_ENABLE;
else
rc6->ctl_enable =
@ -569,7 +569,7 @@ static void __intel_rc6_disable(struct intel_rc6 *rc6)
struct intel_gt *gt = rc6_to_gt(rc6);
/* Take control of RC6 back from GuC */
intel_guc_rc_disable(&gt->uc.guc);
intel_guc_rc_disable(gt_to_guc(gt));
intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
if (GRAPHICS_VER(i915) >= 9)

View file

@ -764,7 +764,7 @@ wa_14015076503_end(struct intel_gt *gt, intel_engine_mask_t engine_mask)
HECI_H_GS1_ER_PREP, 0);
}
int __intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask)
static int __intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask)
{
const int retries = engine_mask == ALL_ENGINES ? RESET_MAX_RETRIES : 1;
reset_func reset;
@ -879,8 +879,17 @@ static intel_engine_mask_t reset_prepare(struct intel_gt *gt)
intel_engine_mask_t awake = 0;
enum intel_engine_id id;
/* For GuC mode, ensure submission is disabled before stopping ring */
intel_uc_reset_prepare(&gt->uc);
/**
* For GuC mode with submission enabled, ensure submission
* is disabled before stopping ring.
*
* For GuC mode with submission disabled, ensure that GuC is not
* sanitized, do that after engine reset. reset_prepare()
* is followed by engine reset which in this mode requires GuC to
* process any CSB FIFO entries generated by the resets.
*/
if (intel_uc_uses_guc_submission(&gt->uc))
intel_uc_reset_prepare(&gt->uc);
for_each_engine(engine, gt, id) {
if (intel_engine_pm_get_if_awake(engine))
@ -978,7 +987,7 @@ static void __intel_gt_set_wedged(struct intel_gt *gt)
/* Even if the GPU reset fails, it should still stop the engines */
if (!INTEL_INFO(gt->i915)->gpu_reset_clobbers_display)
__intel_gt_reset(gt, ALL_ENGINES);
intel_gt_reset_all_engines(gt);
for_each_engine(engine, gt, id)
engine->submit_request = nop_submit_request;
@ -1089,7 +1098,7 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt)
/* We must reset pending GPU events before restoring our submission */
ok = !HAS_EXECLISTS(gt->i915); /* XXX better agnosticism desired */
if (!INTEL_INFO(gt->i915)->gpu_reset_clobbers_display)
ok = __intel_gt_reset(gt, ALL_ENGINES) == 0;
ok = intel_gt_reset_all_engines(gt) == 0;
if (!ok) {
/*
* Warn CI about the unrecoverable wedged condition.
@ -1133,10 +1142,10 @@ static int do_reset(struct intel_gt *gt, intel_engine_mask_t stalled_mask)
{
int err, i;
err = __intel_gt_reset(gt, ALL_ENGINES);
err = intel_gt_reset_all_engines(gt);
for (i = 0; err && i < RESET_MAX_RETRIES; i++) {
msleep(10 * (i + 1));
err = __intel_gt_reset(gt, ALL_ENGINES);
err = intel_gt_reset_all_engines(gt);
}
if (err)
return err;
@ -1227,6 +1236,9 @@ void intel_gt_reset(struct intel_gt *gt,
intel_overlay_reset(gt->i915);
/* sanitize uC after engine reset */
if (!intel_uc_uses_guc_submission(&gt->uc))
intel_uc_reset_prepare(&gt->uc);
/*
* Next we need to restore the context, but we don't use those
* yet either...
@ -1270,7 +1282,30 @@ void intel_gt_reset(struct intel_gt *gt,
goto finish;
}
static int intel_gt_reset_engine(struct intel_engine_cs *engine)
/**
* intel_gt_reset_all_engines() - Reset all engines in the given gt.
* @gt: the GT to reset all engines for.
*
* This function resets all engines within the given gt.
*
* Returns:
* Zero on success, negative error code on failure.
*/
int intel_gt_reset_all_engines(struct intel_gt *gt)
{
return __intel_gt_reset(gt, ALL_ENGINES);
}
/**
* intel_gt_reset_engine() - Reset a specific engine within a gt.
* @engine: engine to be reset.
*
* This function resets the specified engine within a gt.
*
* Returns:
* Zero on success, negative error code on failure.
*/
int intel_gt_reset_engine(struct intel_engine_cs *engine)
{
return __intel_gt_reset(engine->gt, engine->mask);
}

View file

@ -54,7 +54,8 @@ int intel_gt_terminally_wedged(struct intel_gt *gt);
void intel_gt_set_wedged_on_init(struct intel_gt *gt);
void intel_gt_set_wedged_on_fini(struct intel_gt *gt);
int __intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask);
int intel_gt_reset_engine(struct intel_engine_cs *engine);
int intel_gt_reset_all_engines(struct intel_gt *gt);
int intel_reset_guc(struct intel_gt *gt);

View file

@ -52,7 +52,7 @@ static struct intel_guc_slpc *rps_to_slpc(struct intel_rps *rps)
{
struct intel_gt *gt = rps_to_gt(rps);
return &gt->uc.guc.slpc;
return &gt_to_guc(gt)->slpc;
}
static bool rps_uses_slpc(struct intel_rps *rps)
@ -1013,6 +1013,10 @@ void intel_rps_boost(struct i915_request *rq)
if (i915_request_signaled(rq) || i915_request_has_waitboost(rq))
return;
/* Waitboost is not needed for contexts marked with a Freq hint */
if (test_bit(CONTEXT_LOW_LATENCY, &rq->context->flags))
return;
/* Serializes with i915_request_retire() */
if (!test_and_set_bit(I915_FENCE_FLAG_BOOST, &rq->fence.flags)) {
struct intel_rps *rps = &READ_ONCE(rq->engine)->gt->rps;

View file

@ -132,7 +132,7 @@ void intel_gt_invalidate_tlb_full(struct intel_gt *gt, u32 seqno)
return;
with_intel_gt_pm_if_awake(gt, wakeref) {
struct intel_guc *guc = &gt->uc.guc;
struct intel_guc *guc = gt_to_guc(gt);
mutex_lock(&gt->tlb.invalidate_lock);
if (tlb_seqno_passed(gt, seqno))

View file

@ -2760,10 +2760,14 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li
if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_B0, STEP_FOREVER) ||
IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_B0, STEP_FOREVER) ||
IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 74), IP_VER(12, 74)))
IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 74), IP_VER(12, 74))) {
/* Wa_14017856879 */
wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN3, MTL_DISABLE_FIX_FOR_EOT_FLUSH);
/* Wa_14020495402 */
wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2, XELPG_DISABLE_TDL_SVHS_GATING);
}
if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0))
/*
@ -2800,9 +2804,6 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li
/* Wa_14015227452:dg2,pvc */
wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE);
/* Wa_16015675438:dg2,pvc */
wa_masked_en(wal, FF_SLICE_CS_CHICKEN2, GEN12_PERF_FIX_BALANCING_CFE_DISABLE);
/*
* Wa_16011620976:dg2_g11
* Wa_22015475538:dg2

View file

@ -319,7 +319,7 @@ static int igt_hang_sanitycheck(void *arg)
i915_request_add(rq);
timeout = 0;
intel_wedge_on_timeout(&w, gt, HZ / 10 /* 100ms */)
intel_wedge_on_timeout(&w, gt, HZ / 5 /* 200ms */)
timeout = i915_request_wait(rq, 0,
MAX_SCHEDULE_TIMEOUT);
if (intel_gt_is_wedged(gt))

View file

@ -281,7 +281,7 @@ static int igt_atomic_reset(void *arg)
awake = reset_prepare(gt);
p->critical_section_begin();
err = __intel_gt_reset(gt, ALL_ENGINES);
err = intel_gt_reset_all_engines(gt);
p->critical_section_end();
reset_finish(gt, awake);

View file

@ -53,7 +53,7 @@ static int slpc_set_max_freq(struct intel_guc_slpc *slpc, u32 freq)
static int slpc_set_freq(struct intel_gt *gt, u32 freq)
{
int err;
struct intel_guc_slpc *slpc = &gt->uc.guc.slpc;
struct intel_guc_slpc *slpc = &gt_to_guc(gt)->slpc;
err = slpc_set_max_freq(slpc, freq);
if (err) {
@ -182,7 +182,7 @@ static int vary_min_freq(struct intel_guc_slpc *slpc, struct intel_rps *rps,
static int slpc_power(struct intel_gt *gt, struct intel_engine_cs *engine)
{
struct intel_guc_slpc *slpc = &gt->uc.guc.slpc;
struct intel_guc_slpc *slpc = &gt_to_guc(gt)->slpc;
struct {
u64 power;
int freq;
@ -262,7 +262,7 @@ static int max_granted_freq(struct intel_guc_slpc *slpc, struct intel_rps *rps,
static int run_test(struct intel_gt *gt, int test_type)
{
struct intel_guc_slpc *slpc = &gt->uc.guc.slpc;
struct intel_guc_slpc *slpc = &gt_to_guc(gt)->slpc;
struct intel_rps *rps = &gt->rps;
struct intel_engine_cs *engine;
enum intel_engine_id id;

View file

@ -207,6 +207,27 @@ struct slpc_shared_data {
u8 reserved_mode_definition[4096];
} __packed;
struct slpc_context_frequency_request {
u32 frequency_request:16;
u32 reserved:12;
u32 is_compute:1;
u32 ignore_busyness:1;
u32 is_minimum:1;
u32 is_predefined:1;
} __packed;
#define SLPC_CTX_FREQ_REQ_IS_COMPUTE REG_BIT(28)
struct slpc_optimized_strategies {
u32 compute:1;
u32 async_flip:1;
u32 media:1;
u32 vsync_flip:1;
u32 reserved:28;
} __packed;
#define SLPC_OPTIMIZED_STRATEGY_COMPUTE REG_BIT(0)
/**
* DOC: SLPC H2G MESSAGE FORMAT
*

View file

@ -36,6 +36,7 @@ enum intel_guc_load_status {
INTEL_GUC_LOAD_STATUS_INVALID_INIT_DATA_RANGE_START,
INTEL_GUC_LOAD_STATUS_MPU_DATA_INVALID = 0x73,
INTEL_GUC_LOAD_STATUS_INIT_MMIO_SAVE_RESTORE_INVALID = 0x74,
INTEL_GUC_LOAD_STATUS_KLV_WORKAROUND_INIT_ERROR = 0x75,
INTEL_GUC_LOAD_STATUS_INVALID_INIT_DATA_RANGE_END,
INTEL_GUC_LOAD_STATUS_READY = 0xF0,

View file

@ -101,4 +101,11 @@ enum {
GUC_CONTEXT_POLICIES_KLV_NUM_IDS = 5,
};
/*
* Workaround keys:
*/
enum {
GUC_WORKAROUND_KLV_SERIALIZED_RA_MODE = 0x9001,
};
#endif /* _ABI_GUC_KLVS_ABI_H */

View file

@ -298,7 +298,7 @@ static int gsc_fw_load_prepare(struct intel_gsc_uc *gsc)
memcpy_toio(gsc->local_vaddr, src, gsc->fw.size);
memset_io(gsc->local_vaddr + gsc->fw.size, 0, gsc->local->size - gsc->fw.size);
intel_guc_write_barrier(&gt->uc.guc);
intel_guc_write_barrier(gt_to_guc(gt));
i915_gem_object_unpin_map(gsc->fw.obj);
@ -351,7 +351,7 @@ static int gsc_fw_query_compatibility_version(struct intel_gsc_uc *gsc)
void *vaddr;
int err;
err = intel_guc_allocate_and_map_vma(&gt->uc.guc, GSC_VER_PKT_SZ * 2,
err = intel_guc_allocate_and_map_vma(gt_to_guc(gt), GSC_VER_PKT_SZ * 2,
&vma, &vaddr);
if (err) {
gt_err(gt, "failed to allocate vma for GSC version query\n");

View file

@ -358,7 +358,8 @@ static int proxy_channel_alloc(struct intel_gsc_uc *gsc)
void *vaddr;
int err;
err = intel_guc_allocate_and_map_vma(&gt->uc.guc, GSC_PROXY_CHANNEL_SIZE,
err = intel_guc_allocate_and_map_vma(gt_to_guc(gt),
GSC_PROXY_CHANNEL_SIZE,
&vma, &vaddr);
if (err)
return err;

View file

@ -294,6 +294,11 @@ static u32 guc_ctl_wa_flags(struct intel_guc *guc)
IS_DG2(gt->i915))
flags |= GUC_WA_HOLD_CCS_SWITCHOUT;
/* Wa_16019325821 */
/* Wa_14019159160 */
if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71)))
flags |= GUC_WA_RCS_CCS_SWITCHOUT;
/*
* Wa_14012197797
* Wa_22011391025
@ -315,11 +320,12 @@ static u32 guc_ctl_wa_flags(struct intel_guc *guc)
if (IS_DG2_G11(gt->i915))
flags |= GUC_WA_CONTEXT_ISOLATION;
/* Wa_14018913170 */
if (GUC_FIRMWARE_VER(guc) >= MAKE_GUC_VER(70, 7, 0)) {
if (IS_DG2(gt->i915) || IS_METEORLAKE(gt->i915))
flags |= GUC_WA_ENABLE_TSC_CHECK_ON_RC6;
}
/*
* Wa_14018913170: Applicable to all platforms supported by i915 so
* don't bother testing for all X/Y/Z platforms explicitly.
*/
if (GUC_FIRMWARE_VER(guc) >= MAKE_GUC_VER(70, 7, 0))
flags |= GUC_WA_ENABLE_TSC_CHECK_ON_RC6;
return flags;
}

View file

@ -204,6 +204,8 @@ struct intel_guc {
struct guc_mmio_reg *ads_regset;
/** @ads_golden_ctxt_size: size of the golden contexts in the ADS */
u32 ads_golden_ctxt_size;
/** @ads_waklv_size: size of workaround KLVs */
u32 ads_waklv_size;
/** @ads_capture_size: size of register lists in the ADS used for error capture */
u32 ads_capture_size;

View file

@ -46,6 +46,10 @@
* +---------------------------------------+
* | padding |
* +---------------------------------------+ <== 4K aligned
* | w/a KLVs |
* +---------------------------------------+
* | padding |
* +---------------------------------------+ <== 4K aligned
* | capture lists |
* +---------------------------------------+
* | padding |
@ -88,6 +92,11 @@ static u32 guc_ads_golden_ctxt_size(struct intel_guc *guc)
return PAGE_ALIGN(guc->ads_golden_ctxt_size);
}
static u32 guc_ads_waklv_size(struct intel_guc *guc)
{
return PAGE_ALIGN(guc->ads_waklv_size);
}
static u32 guc_ads_capture_size(struct intel_guc *guc)
{
return PAGE_ALIGN(guc->ads_capture_size);
@ -113,7 +122,7 @@ static u32 guc_ads_golden_ctxt_offset(struct intel_guc *guc)
return PAGE_ALIGN(offset);
}
static u32 guc_ads_capture_offset(struct intel_guc *guc)
static u32 guc_ads_waklv_offset(struct intel_guc *guc)
{
u32 offset;
@ -123,6 +132,16 @@ static u32 guc_ads_capture_offset(struct intel_guc *guc)
return PAGE_ALIGN(offset);
}
static u32 guc_ads_capture_offset(struct intel_guc *guc)
{
u32 offset;
offset = guc_ads_waklv_offset(guc) +
guc_ads_waklv_size(guc);
return PAGE_ALIGN(offset);
}
static u32 guc_ads_private_data_offset(struct intel_guc *guc)
{
u32 offset;
@ -796,6 +815,65 @@ guc_capture_prep_lists(struct intel_guc *guc)
return PAGE_ALIGN(total_size);
}
/* Wa_14019159160 */
static u32 guc_waklv_ra_mode(struct intel_guc *guc, u32 offset, u32 remain)
{
u32 size;
u32 klv_entry[] = {
/* 16:16 key/length */
FIELD_PREP(GUC_KLV_0_KEY, GUC_WORKAROUND_KLV_SERIALIZED_RA_MODE) |
FIELD_PREP(GUC_KLV_0_LEN, 0),
/* 0 dwords data */
};
size = sizeof(klv_entry);
GEM_BUG_ON(remain < size);
iosys_map_memcpy_to(&guc->ads_map, offset, klv_entry, size);
return size;
}
static void guc_waklv_init(struct intel_guc *guc)
{
struct intel_gt *gt = guc_to_gt(guc);
u32 offset, addr_ggtt, remain, size;
if (!intel_uc_uses_guc_submission(&gt->uc))
return;
if (GUC_FIRMWARE_VER(guc) < MAKE_GUC_VER(70, 10, 0))
return;
GEM_BUG_ON(iosys_map_is_null(&guc->ads_map));
offset = guc_ads_waklv_offset(guc);
remain = guc_ads_waklv_size(guc);
/* Wa_14019159160 */
if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71))) {
size = guc_waklv_ra_mode(guc, offset, remain);
offset += size;
remain -= size;
}
size = guc_ads_waklv_size(guc) - remain;
if (!size)
return;
offset = guc_ads_waklv_offset(guc);
addr_ggtt = intel_guc_ggtt_offset(guc, guc->ads_vma) + offset;
ads_blob_write(guc, ads.wa_klv_addr_lo, addr_ggtt);
ads_blob_write(guc, ads.wa_klv_addr_hi, 0);
ads_blob_write(guc, ads.wa_klv_size, size);
}
static int guc_prep_waklv(struct intel_guc *guc)
{
/* Fudge something chunky for now: */
return PAGE_SIZE;
}
static void __guc_ads_init(struct intel_guc *guc)
{
struct intel_gt *gt = guc_to_gt(guc);
@ -843,6 +921,9 @@ static void __guc_ads_init(struct intel_guc *guc)
/* MMIO save/restore list */
guc_mmio_reg_state_init(guc);
/* Workaround KLV list */
guc_waklv_init(guc);
/* Private Data */
ads_blob_write(guc, ads.private_data, base +
guc_ads_private_data_offset(guc));
@ -886,6 +967,12 @@ int intel_guc_ads_create(struct intel_guc *guc)
return ret;
guc->ads_capture_size = ret;
/* And don't forget the workaround KLVs: */
ret = guc_prep_waklv(guc);
if (ret < 0)
return ret;
guc->ads_waklv_size = ret;
/* Now the total size can be determined: */
size = guc_ads_blob_size(guc);
@ -961,7 +1048,7 @@ u32 intel_guc_engine_usage_offset(struct intel_guc *guc)
struct iosys_map intel_guc_engine_usage_record_map(struct intel_engine_cs *engine)
{
struct intel_guc *guc = &engine->gt->uc.guc;
struct intel_guc *guc = gt_to_guc(engine->gt);
u8 guc_class = engine_class_to_guc_class(engine->class);
size_t offset = offsetof(struct __guc_ads_blob,
engine_usage.engines[guc_class][ilog2(engine->logical_mask)]);

View file

@ -51,6 +51,7 @@
{ RING_ESR(0), 0, 0, "ESR" }, \
{ RING_DMA_FADD(0), 0, 0, "RING_DMA_FADD_LDW" }, \
{ RING_DMA_FADD_UDW(0), 0, 0, "RING_DMA_FADD_UDW" }, \
{ RING_EIR(0), 0, 0, "EIR" }, \
{ RING_IPEIR(0), 0, 0, "IPEIR" }, \
{ RING_IPEHR(0), 0, 0, "IPEHR" }, \
{ RING_INSTPS(0), 0, 0, "INSTPS" }, \
@ -80,9 +81,6 @@
{ GEN8_RING_PDP_LDW(0, 3), 0, 0, "PDP3_LDW" }, \
{ GEN8_RING_PDP_UDW(0, 3), 0, 0, "PDP3_UDW" }
#define COMMON_BASE_HAS_EU \
{ EIR, 0, 0, "EIR" }
#define COMMON_BASE_RENDER \
{ GEN7_SC_INSTDONE, 0, 0, "GEN7_SC_INSTDONE" }
@ -105,7 +103,6 @@ static const struct __guc_mmio_reg_descr xe_lp_global_regs[] = {
/* XE_LP Render / Compute Per-Class */
static const struct __guc_mmio_reg_descr xe_lp_rc_class_regs[] = {
COMMON_BASE_HAS_EU,
COMMON_BASE_RENDER,
COMMON_GEN12BASE_RENDER,
};
@ -148,7 +145,6 @@ static const struct __guc_mmio_reg_descr gen8_global_regs[] = {
};
static const struct __guc_mmio_reg_descr gen8_rc_class_regs[] = {
COMMON_BASE_HAS_EU,
COMMON_BASE_RENDER,
};
@ -1441,7 +1437,7 @@ int intel_guc_capture_print_engine_node(struct drm_i915_error_state_buf *ebuf,
if (!cap || !ee->engine)
return -ENODEV;
guc = &ee->engine->gt->uc.guc;
guc = gt_to_guc(ee->engine->gt);
i915_error_printf(ebuf, "global --- GuC Error Capture on %s command stream:\n",
ee->engine->name);
@ -1543,7 +1539,7 @@ bool intel_guc_capture_is_matching_engine(struct intel_gt *gt,
if (!gt || !ce || !engine)
return false;
guc = &gt->uc.guc;
guc = gt_to_guc(gt);
if (!guc->capture)
return false;
@ -1573,7 +1569,7 @@ void intel_guc_capture_get_matching_node(struct intel_gt *gt,
if (!gt || !ee || !ce)
return;
guc = &gt->uc.guc;
guc = gt_to_guc(gt);
if (!guc->capture)
return;

View file

@ -115,6 +115,7 @@ static inline bool guc_load_done(struct intel_uncore *uncore, u32 *status, bool
case INTEL_GUC_LOAD_STATUS_INIT_DATA_INVALID:
case INTEL_GUC_LOAD_STATUS_MPU_DATA_INVALID:
case INTEL_GUC_LOAD_STATUS_INIT_MMIO_SAVE_RESTORE_INVALID:
case INTEL_GUC_LOAD_STATUS_KLV_WORKAROUND_INIT_ERROR:
*success = false;
return true;
}
@ -241,6 +242,11 @@ static int guc_wait_ucode(struct intel_guc *guc)
ret = -EPERM;
break;
case INTEL_GUC_LOAD_STATUS_KLV_WORKAROUND_INIT_ERROR:
guc_info(guc, "invalid w/a KLV entry\n");
ret = -EINVAL;
break;
case INTEL_GUC_LOAD_STATUS_HWCONFIG_START:
guc_info(guc, "still extracting hwconfig table.\n");
ret = -ETIMEDOUT;

View file

@ -96,8 +96,9 @@
#define GUC_WA_GAM_CREDITS BIT(10)
#define GUC_WA_DUAL_QUEUE BIT(11)
#define GUC_WA_RCS_RESET_BEFORE_RC6 BIT(13)
#define GUC_WA_CONTEXT_ISOLATION BIT(15)
#define GUC_WA_PRE_PARSER BIT(14)
#define GUC_WA_CONTEXT_ISOLATION BIT(15)
#define GUC_WA_RCS_CCS_SWITCHOUT BIT(16)
#define GUC_WA_HOLD_CCS_SWITCHOUT BIT(17)
#define GUC_WA_POLLCS BIT(18)
#define GUC_WA_RCS_REGS_IN_CCS_REGS_LIST BIT(21)
@ -430,7 +431,10 @@ struct guc_ads {
u32 capture_instance[GUC_CAPTURE_LIST_INDEX_MAX][GUC_MAX_ENGINE_CLASSES];
u32 capture_class[GUC_CAPTURE_LIST_INDEX_MAX][GUC_MAX_ENGINE_CLASSES];
u32 capture_global[GUC_CAPTURE_LIST_INDEX_MAX];
u32 reserved[14];
u32 wa_klv_addr_lo;
u32 wa_klv_addr_hi;
u32 wa_klv_size;
u32 reserved[11];
} __packed;
/* Engine usage stats */

View file

@ -111,7 +111,7 @@ static bool has_table(struct drm_i915_private *i915)
static int guc_hwconfig_init(struct intel_gt *gt)
{
struct intel_hwconfig *hwconfig = &gt->info.hwconfig;
struct intel_guc *guc = &gt->uc.guc;
struct intel_guc *guc = gt_to_guc(gt);
int ret;
if (!has_table(gt->i915))

View file

@ -537,6 +537,20 @@ int intel_guc_slpc_get_min_freq(struct intel_guc_slpc *slpc, u32 *val)
return ret;
}
int intel_guc_slpc_set_strategy(struct intel_guc_slpc *slpc, u32 val)
{
struct drm_i915_private *i915 = slpc_to_i915(slpc);
intel_wakeref_t wakeref;
int ret = 0;
with_intel_runtime_pm(&i915->runtime_pm, wakeref)
ret = slpc_set_param(slpc,
SLPC_PARAM_STRATEGIES,
val);
return ret;
}
int intel_guc_slpc_set_media_ratio_mode(struct intel_guc_slpc *slpc, u32 val)
{
struct drm_i915_private *i915 = slpc_to_i915(slpc);
@ -711,6 +725,9 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc)
/* Set cached media freq ratio mode */
intel_guc_slpc_set_media_ratio_mode(slpc, slpc->media_ratio_mode);
/* Enable SLPC Optimized Strategy for compute */
intel_guc_slpc_set_strategy(slpc, SLPC_OPTIMIZED_STRATEGY_COMPUTE);
return 0;
}

View file

@ -45,5 +45,6 @@ void intel_guc_pm_intrmsk_enable(struct intel_gt *gt);
void intel_guc_slpc_boost(struct intel_guc_slpc *slpc);
void intel_guc_slpc_dec_waiters(struct intel_guc_slpc *slpc);
int intel_guc_slpc_set_ignore_eff_freq(struct intel_guc_slpc *slpc, bool val);
int intel_guc_slpc_set_strategy(struct intel_guc_slpc *slpc, u32 val);
#endif

View file

@ -398,7 +398,7 @@ static inline void set_context_guc_id_invalid(struct intel_context *ce)
static inline struct intel_guc *ce_to_guc(struct intel_context *ce)
{
return &ce->engine->gt->uc.guc;
return gt_to_guc(ce->engine->gt);
}
static inline struct i915_priolist *to_priolist(struct rb_node *rb)
@ -1246,7 +1246,7 @@ static void __get_engine_usage_record(struct intel_engine_cs *engine,
static void guc_update_engine_gt_clks(struct intel_engine_cs *engine)
{
struct intel_engine_guc_stats *stats = &engine->stats.guc;
struct intel_guc *guc = &engine->gt->uc.guc;
struct intel_guc *guc = gt_to_guc(engine->gt);
u32 last_switch, ctx_id, total;
lockdep_assert_held(&guc->timestamp.lock);
@ -1311,7 +1311,7 @@ static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now)
struct intel_engine_guc_stats stats_saved, *stats = &engine->stats.guc;
struct i915_gpu_error *gpu_error = &engine->i915->gpu_error;
struct intel_gt *gt = engine->gt;
struct intel_guc *guc = &gt->uc.guc;
struct intel_guc *guc = gt_to_guc(gt);
u64 total, gt_stamp_saved;
unsigned long flags;
u32 reset_count;
@ -1577,7 +1577,7 @@ static void guc_fini_engine_stats(struct intel_guc *guc)
void intel_guc_busyness_park(struct intel_gt *gt)
{
struct intel_guc *guc = &gt->uc.guc;
struct intel_guc *guc = gt_to_guc(gt);
if (!guc_submission_initialized(guc))
return;
@ -1604,7 +1604,7 @@ void intel_guc_busyness_park(struct intel_gt *gt)
void intel_guc_busyness_unpark(struct intel_gt *gt)
{
struct intel_guc *guc = &gt->uc.guc;
struct intel_guc *guc = gt_to_guc(gt);
unsigned long flags;
ktime_t unused;
@ -2189,7 +2189,7 @@ static bool need_tasklet(struct intel_guc *guc, struct i915_request *rq)
static void guc_submit_request(struct i915_request *rq)
{
struct i915_sched_engine *sched_engine = rq->engine->sched_engine;
struct intel_guc *guc = &rq->engine->gt->uc.guc;
struct intel_guc *guc = gt_to_guc(rq->engine->gt);
unsigned long flags;
/* Will be called from irq-context when using foreign fences. */
@ -2215,11 +2215,10 @@ static int new_guc_id(struct intel_guc *guc, struct intel_context *ce)
order_base_2(ce->parallel.number_children
+ 1));
else
ret = ida_simple_get(&guc->submission_state.guc_ids,
NUMBER_MULTI_LRC_GUC_ID(guc),
guc->submission_state.num_guc_ids,
GFP_KERNEL | __GFP_RETRY_MAYFAIL |
__GFP_NOWARN);
ret = ida_alloc_range(&guc->submission_state.guc_ids,
NUMBER_MULTI_LRC_GUC_ID(guc),
guc->submission_state.num_guc_ids - 1,
GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
if (unlikely(ret < 0))
return ret;
@ -2242,8 +2241,8 @@ static void __release_guc_id(struct intel_guc *guc, struct intel_context *ce)
+ 1));
} else {
--guc->submission_state.guc_ids_in_use;
ida_simple_remove(&guc->submission_state.guc_ids,
ce->guc_id.id);
ida_free(&guc->submission_state.guc_ids,
ce->guc_id.id);
}
clr_ctx_id_mapping(guc, ce->guc_id.id);
set_context_guc_id_invalid(ce);
@ -2640,6 +2639,7 @@ MAKE_CONTEXT_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM)
MAKE_CONTEXT_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT)
MAKE_CONTEXT_POLICY_ADD(priority, SCHEDULING_PRIORITY)
MAKE_CONTEXT_POLICY_ADD(preempt_to_idle, PREEMPT_TO_IDLE_ON_QUANTUM_EXPIRY)
MAKE_CONTEXT_POLICY_ADD(slpc_ctx_freq_req, SLPM_GT_FREQUENCY)
#undef MAKE_CONTEXT_POLICY_ADD
@ -2655,10 +2655,11 @@ static int __guc_context_set_context_policies(struct intel_guc *guc,
static int guc_context_policy_init_v70(struct intel_context *ce, bool loop)
{
struct intel_engine_cs *engine = ce->engine;
struct intel_guc *guc = &engine->gt->uc.guc;
struct intel_guc *guc = gt_to_guc(engine->gt);
struct context_policy policy;
u32 execution_quantum;
u32 preemption_timeout;
u32 slpc_ctx_freq_req = 0;
unsigned long flags;
int ret;
@ -2670,11 +2671,15 @@ static int guc_context_policy_init_v70(struct intel_context *ce, bool loop)
execution_quantum = engine->props.timeslice_duration_ms * 1000;
preemption_timeout = engine->props.preempt_timeout_ms * 1000;
if (ce->flags & BIT(CONTEXT_LOW_LATENCY))
slpc_ctx_freq_req |= SLPC_CTX_FREQ_REQ_IS_COMPUTE;
__guc_context_policy_start_klv(&policy, ce->guc_id.id);
__guc_context_policy_add_priority(&policy, ce->guc_state.prio);
__guc_context_policy_add_execution_quantum(&policy, execution_quantum);
__guc_context_policy_add_preemption_timeout(&policy, preemption_timeout);
__guc_context_policy_add_slpc_ctx_freq_req(&policy, slpc_ctx_freq_req);
if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION)
__guc_context_policy_add_preempt_to_idle(&policy, 1);
@ -2731,7 +2736,7 @@ static u32 map_guc_prio_to_lrc_desc_prio(u8 prio)
static void prepare_context_registration_info_v69(struct intel_context *ce)
{
struct intel_engine_cs *engine = ce->engine;
struct intel_guc *guc = &engine->gt->uc.guc;
struct intel_guc *guc = gt_to_guc(engine->gt);
u32 ctx_id = ce->guc_id.id;
struct guc_lrc_desc_v69 *desc;
struct intel_context *child;
@ -2800,7 +2805,7 @@ static void prepare_context_registration_info_v70(struct intel_context *ce,
struct guc_ctxt_registration_info *info)
{
struct intel_engine_cs *engine = ce->engine;
struct intel_guc *guc = &engine->gt->uc.guc;
struct intel_guc *guc = gt_to_guc(engine->gt);
u32 ctx_id = ce->guc_id.id;
GEM_BUG_ON(!engine->mask);
@ -2863,7 +2868,7 @@ static int try_context_registration(struct intel_context *ce, bool loop)
{
struct intel_engine_cs *engine = ce->engine;
struct intel_runtime_pm *runtime_pm = engine->uncore->rpm;
struct intel_guc *guc = &engine->gt->uc.guc;
struct intel_guc *guc = gt_to_guc(engine->gt);
intel_wakeref_t wakeref;
u32 ctx_id = ce->guc_id.id;
bool context_registered;
@ -4491,7 +4496,13 @@ static void guc_default_vfuncs(struct intel_engine_cs *engine)
if (engine->class == COMPUTE_CLASS)
if (IS_GFX_GT_IP_STEP(engine->gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
IS_DG2(engine->i915))
engine->flags |= I915_ENGINE_USES_WA_HOLD_CCS_SWITCHOUT;
engine->flags |= I915_ENGINE_USES_WA_HOLD_SWITCHOUT;
/* Wa_16019325821 */
/* Wa_14019159160 */
if ((engine->class == COMPUTE_CLASS || engine->class == RENDER_CLASS) &&
IS_GFX_GT_IP_RANGE(engine->gt, IP_VER(12, 70), IP_VER(12, 71)))
engine->flags |= I915_ENGINE_USES_WA_HOLD_SWITCHOUT;
/*
* TODO: GuC supports timeslicing and semaphores as well, but they're
@ -4544,7 +4555,7 @@ static void guc_sched_engine_destroy(struct kref *kref)
int intel_guc_submission_setup(struct intel_engine_cs *engine)
{
struct drm_i915_private *i915 = engine->i915;
struct intel_guc *guc = &engine->gt->uc.guc;
struct intel_guc *guc = gt_to_guc(engine->gt);
/*
* The setup relies on several assumptions (e.g. irqs always enabled)
@ -5303,7 +5314,7 @@ int intel_guc_engine_failure_process_msg(struct intel_guc *guc,
void intel_guc_find_hung_context(struct intel_engine_cs *engine)
{
struct intel_guc *guc = &engine->gt->uc.guc;
struct intel_guc *guc = gt_to_guc(engine->gt);
struct intel_context *ce;
struct i915_request *rq;
unsigned long index;
@ -5365,7 +5376,7 @@ void intel_guc_dump_active_requests(struct intel_engine_cs *engine,
struct i915_request *hung_rq,
struct drm_printer *m)
{
struct intel_guc *guc = &engine->gt->uc.guc;
struct intel_guc *guc = gt_to_guc(engine->gt);
struct intel_context *ce;
unsigned long index;
unsigned long flags;
@ -5817,7 +5828,7 @@ guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count,
if (!ve)
return ERR_PTR(-ENOMEM);
guc = &siblings[0]->gt->uc.guc;
guc = gt_to_guc(siblings[0]->gt);
ve->base.i915 = siblings[0]->i915;
ve->base.gt = siblings[0]->gt;

View file

@ -385,7 +385,7 @@ int intel_huc_init(struct intel_huc *huc)
if (HAS_ENGINE(gt, GSC0)) {
struct i915_vma *vma;
vma = intel_guc_allocate_vma(&gt->uc.guc, PXP43_HUC_AUTH_INOUT_SIZE * 2);
vma = intel_guc_allocate_vma(gt_to_guc(gt), PXP43_HUC_AUTH_INOUT_SIZE * 2);
if (IS_ERR(vma)) {
err = PTR_ERR(vma);
huc_info(huc, "Failed to allocate heci pkt\n");
@ -540,7 +540,7 @@ int intel_huc_wait_for_auth_complete(struct intel_huc *huc,
int intel_huc_auth(struct intel_huc *huc, enum intel_huc_authentication_type type)
{
struct intel_gt *gt = huc_to_gt(huc);
struct intel_guc *guc = &gt->uc.guc;
struct intel_guc *guc = gt_to_guc(gt);
int ret;
if (!intel_uc_fw_is_loaded(&huc->fw))

View file

@ -807,7 +807,7 @@ static int try_firmware_load(struct intel_uc_fw *uc_fw, const struct firmware **
static int check_mtl_huc_guc_compatibility(struct intel_gt *gt,
struct intel_uc_fw_file *huc_selected)
{
struct intel_uc_fw_file *guc_selected = &gt->uc.guc.fw.file_selected;
struct intel_uc_fw_file *guc_selected = &gt_to_guc(gt)->fw.file_selected;
struct intel_uc_fw_ver *huc_ver = &huc_selected->ver;
struct intel_uc_fw_ver *guc_ver = &guc_selected->ver;
bool new_huc, new_guc;
@ -1209,7 +1209,7 @@ static int uc_fw_rsa_data_create(struct intel_uc_fw *uc_fw)
* since its GGTT offset will be GuC accessible.
*/
GEM_BUG_ON(uc_fw->rsa_size > PAGE_SIZE);
vma = intel_guc_allocate_vma(&gt->uc.guc, PAGE_SIZE);
vma = intel_guc_allocate_vma(gt_to_guc(gt), PAGE_SIZE);
if (IS_ERR(vma))
return PTR_ERR(vma);

View file

@ -144,7 +144,7 @@ static int intel_guc_scrub_ctbs(void *arg)
static int intel_guc_steal_guc_ids(void *arg)
{
struct intel_gt *gt = arg;
struct intel_guc *guc = &gt->uc.guc;
struct intel_guc *guc = gt_to_guc(gt);
int ret, sv, context_index = 0;
intel_wakeref_t wakeref;
struct intel_engine_cs *engine;

View file

@ -202,7 +202,7 @@ static void sanitize_gpu(struct drm_i915_private *i915)
unsigned int i;
for_each_gt(gt, i915, i)
__intel_gt_reset(gt, ALL_ENGINES);
intel_gt_reset_all_engines(gt);
}
}

View file

@ -155,6 +155,12 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data,
*/
value = 1;
break;
case I915_PARAM_HAS_CONTEXT_FREQ_HINT:
if (intel_uc_uses_guc_submission(&to_gt(i915)->uc))
value = 1;
else
value = -EINVAL;
break;
case I915_PARAM_HAS_CONTEXT_ISOLATION:
value = intel_engines_has_context_isolation(i915);
break;

View file

@ -1776,8 +1776,6 @@ static void release_references(struct i915_vma *vma, struct intel_gt *gt,
if (vm_ddestroy)
i915_vm_resv_put(vma->vm);
/* Wait for async active retire */
i915_active_wait(&vma->active);
i915_active_fini(&vma->active);
GEM_WARN_ON(vma->resource);
i915_vma_free(vma);

View file

@ -154,6 +154,30 @@ __wait_gsc_proxy_completed(struct drm_i915_private *i915)
pr_warn(DRIVER_NAME "Timed out waiting for gsc_proxy_completion!\n");
}
static void
__wait_gsc_huc_load_completed(struct drm_i915_private *i915)
{
/* this only applies to DG2, so we only care about GT0 */
struct intel_huc *huc = &to_gt(i915)->uc.huc;
bool need_to_wait = (IS_ENABLED(CONFIG_INTEL_MEI_PXP) &&
intel_huc_wait_required(huc));
/*
* The GSC and PXP mei bringup depends on the kernel boot ordering, so
* to account for the worst case scenario the HuC code waits for up to
* 10s for the GSC driver to load and then another 5s for the PXP
* component to bind before giving up, even though those steps normally
* complete in less than a second from the i915 load. We match that
* timeout here, but we expect to bail early due to the fence being
* signalled even in a failure case, as it is extremely unlikely that
* both components will use their full timeout.
*/
unsigned long timeout_ms = 15000;
if (need_to_wait &&
wait_for(i915_sw_fence_done(&huc->delayed_load.fence), timeout_ms))
pr_warn(DRIVER_NAME "Timed out waiting for huc load via GSC!\n");
}
static int __run_selftests(const char *name,
struct selftest *st,
unsigned int count,
@ -228,14 +252,16 @@ int i915_mock_selftests(void)
int i915_live_selftests(struct pci_dev *pdev)
{
struct drm_i915_private *i915 = pdev_to_i915(pdev);
int err;
if (!i915_selftest.live)
return 0;
__wait_gsc_proxy_completed(pdev_to_i915(pdev));
__wait_gsc_proxy_completed(i915);
__wait_gsc_huc_load_completed(i915);
err = run_selftests(live, pdev_to_i915(pdev));
err = run_selftests(live, i915);
if (err) {
i915_selftest.live = err;
return err;
@ -251,14 +277,16 @@ int i915_live_selftests(struct pci_dev *pdev)
int i915_perf_selftests(struct pci_dev *pdev)
{
struct drm_i915_private *i915 = pdev_to_i915(pdev);
int err;
if (!i915_selftest.perf)
return 0;
__wait_gsc_proxy_completed(pdev_to_i915(pdev));
__wait_gsc_proxy_completed(i915);
__wait_gsc_huc_load_completed(i915);
err = run_selftests(perf, pdev_to_i915(pdev));
err = run_selftests(perf, i915);
if (err) {
i915_selftest.perf = err;
return err;

View file

@ -806,6 +806,12 @@ typedef struct drm_i915_irq_wait {
*/
#define I915_PARAM_PXP_STATUS 58
/*
* Query if kernel allows marking a context to send a Freq hint to SLPC. This
* will enable use of the strategies allowed by the SLPC algorithm.
*/
#define I915_PARAM_HAS_CONTEXT_FREQ_HINT 59
/* Must be kept compact -- no holes and well documented */
/**
@ -2148,6 +2154,15 @@ struct drm_i915_gem_context_param {
* -EIO: The firmware did not succeed in creating the protected context.
*/
#define I915_CONTEXT_PARAM_PROTECTED_CONTENT 0xd
/*
* I915_CONTEXT_PARAM_LOW_LATENCY:
*
* Mark this context as a low latency workload which requires aggressive GT
* frequency scaling. Use I915_PARAM_HAS_CONTEXT_FREQ_HINT to check if the kernel
* supports this per context flag.
*/
#define I915_CONTEXT_PARAM_LOW_LATENCY 0xe
/* Must be kept compact -- no holes and well documented */
/** @value: Context parameter value to be set or queried */