drm fixes for 5.14-rc1

dma-buf:
 - doc fixes
 
 amdgpu:
 - Misc Navi fixes
 - Powergating fix
 - Yellow Carp updates
 - Beige Goby updates
 - S0ix fix
 - Revert overlay validation fix
 - GPU reset fix for DC
 - PPC64 fix
 - Add new dimgrey cavefish DID
 - RAS fix
 - TTM fixes
 
 amdkfd:
 - SVM fixes
 
 radeon:
 - Fix missing drm_gem_object_put in error path
 - NULL ptr deref fix
 
 i915:
 - display DP VSC fix
 - DG1 display fix
 - IRQ fixes
 - IRQ demidlayering
 
 gma500:
 - bo leaks in error paths fixed
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCAAdFiEEEKbZHaGwW9KfbeusDHTzWXnEhr4FAmDma1oACgkQDHTzWXnE
 hr4l1RAAmUhJbcqPEDGI2Hydl7o4/NMWXyyINYzcuCndeT76+yqdiTVsRQNvmTRO
 MoW8AACjnNc2xktqtbv+hH4/vZinqHQD8z93kk4A+yA6TDzOLdJTnS7XfoKFI/S3
 /lvmwYJgW1nN3TahMI/juTmJanYEnSwVvFVGtRkOEtG7wgomNDYgqdm/NSfmENXV
 6q1rHamzAMoXYFxviST+6jK2kBLFN7jjBAyaVyj4ufnF2CG/oUrsAtdvXpM1QSta
 R1LJ3g43pnOFPpaNtwSlf9wsDQZ0oMSJ2Tt+hLpyhKr+zHVygOBjRR9AGNd/2P6t
 mCQWiYI+B4i0246XDFj6yS/angQHox9/fUYMM+AgExIBH1PAdTE9yZE/JNsNBc/S
 4llmAA/eQco+DM8HQ84BPl8yi9Y3qaUONJJQsejpTf9Cvey5xQ2HnDmPbBACfqd0
 /BxCfhzcJmPxuV5bRh7e/nJt0/Uj83U5f6oQlUImmAdr+t60NnuwxoWV0wt7Uz+0
 v9TkqQ5QE2jpcm56ug8cxOkSKK9U/RDQCfqUY6zSYPMSFu5WL2fOWxJKpyIz9vSo
 E8awltAu59nUT10P/zvzdN64egWrRBwSECg4Bd6I2L9SxfnYuXJwJ+gMObFo0SF5
 cb7KfM2kffxaekf2TashsjEgkrYtCkhh6QMIkDuUOWtIzu4/ikc=
 =HX2N
 -----END PGP SIGNATURE-----

Merge tag 'drm-next-2021-07-08-1' of git://anongit.freedesktop.org/drm/drm

Pull drm fixes from Dave Airlie:
 "Some fixes for rc1 that came in the past weeks, mainly a bunch of
  amdgpu fixes, some i915 and the rest are misc around the place. I'm
  sending this a bit early so some more stuff may show up, but I'll
  probably take tomorrow off.

  dma-buf:
   - doc fixes

  amdgpu:
   - Misc Navi fixes
   - Powergating fix
   - Yellow Carp updates
   - Beige Goby updates
   - S0ix fix
   - Revert overlay validation fix
   - GPU reset fix for DC
   - PPC64 fix
   - Add new dimgrey cavefish DID
   - RAS fix
   - TTM fixes

  amdkfd:
   - SVM fixes

  radeon:
   - Fix missing drm_gem_object_put in error path
   - NULL ptr deref fix

  i915:
   - display DP VSC fix
   - DG1 display fix
   - IRQ fixes
   - IRQ demidlayering

  gma500:
   - bo leaks in error paths fixed"

* tag 'drm-next-2021-07-08-1' of git://anongit.freedesktop.org/drm/drm: (52 commits)
  drm/i915: Drop all references to DRM IRQ midlayer
  drm/i915: Use the correct IRQ during resume
  drm/i915/display/dg1: Correctly map DPLLs during state readout
  drm/i915/display: Do not zero past infoframes.vsc
  drm/amdgpu: Conditionally reset SDMA RAS error counts
  drm/amdkfd: Maintain svm_bo reference in page->zone_device_data
  drm/amdkfd: add invalid pages debug at vram migration
  drm/amdkfd: skip migration for pages already in VRAM
  drm/amdkfd: skip invalid pages during migrations
  drm/amdkfd: classify and map mixed svm range pages in GPU
  drm/amdkfd: use hmm range fault to get both domain pfns
  drm/amdgpu: get owner ref in validate and map
  drm/amdkfd: set owner ref to svm range prefault
  drm/amdkfd: add owner ref param to get hmm pages
  drm/amdkfd: device pgmap owner at the svm migrate init
  drm/amdkfd: inc counter on child ranges with xnack off
  drm/amd/display: Extend DMUB diagnostic logging to DCN3.1
  drm/amdgpu: Update NV SIMD-per-CU to 2
  drm/amdgpu: add new dimgrey cavefish DID
  drm/amd/pm: skip PrepareMp1ForUnload message in s0ix
  ...
This commit is contained in:
Linus Torvalds 2021-07-08 12:28:15 -07:00
commit f55966571d
53 changed files with 1049 additions and 505 deletions

View file

@ -1369,6 +1369,38 @@ static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
adev->pm.smu_prv_buffer_size = 0;
}
static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev)
{
if (!(adev->flags & AMD_IS_APU) ||
adev->asic_type < CHIP_RAVEN)
return 0;
switch (adev->asic_type) {
case CHIP_RAVEN:
if (adev->pdev->device == 0x15dd)
adev->apu_flags |= AMD_APU_IS_RAVEN;
if (adev->pdev->device == 0x15d8)
adev->apu_flags |= AMD_APU_IS_PICASSO;
break;
case CHIP_RENOIR:
if ((adev->pdev->device == 0x1636) ||
(adev->pdev->device == 0x164c))
adev->apu_flags |= AMD_APU_IS_RENOIR;
else
adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE;
break;
case CHIP_VANGOGH:
adev->apu_flags |= AMD_APU_IS_VANGOGH;
break;
case CHIP_YELLOW_CARP:
break;
default:
return -EINVAL;
}
return 0;
}
/**
* amdgpu_device_check_arguments - validate module params
*
@ -3386,6 +3418,10 @@ int amdgpu_device_init(struct amdgpu_device *adev,
mutex_init(&adev->psp.mutex);
mutex_init(&adev->notifier_lock);
r = amdgpu_device_init_apu_flags(adev);
if (r)
return r;
r = amdgpu_device_check_arguments(adev);
if (r)
return r;
@ -4304,6 +4340,7 @@ bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
case CHIP_SIENNA_CICHLID:
case CHIP_NAVY_FLOUNDER:
case CHIP_DIMGREY_CAVEFISH:
case CHIP_BEIGE_GOBY:
case CHIP_VANGOGH:
case CHIP_ALDEBARAN:
break;

View file

@ -160,6 +160,7 @@ int amdgpu_smu_pptable_id = -1;
* highest. That helps saving some idle power.
* DISABLE_FRACTIONAL_PWM (bit 2) disabled by default
* PSR (bit 3) disabled by default
* EDP NO POWER SEQUENCING (bit 4) disabled by default
*/
uint amdgpu_dc_feature_mask = 2;
uint amdgpu_dc_debug_mask;
@ -1198,6 +1199,7 @@ static const struct pci_device_id pciidlist[] = {
{0x1002, 0x73E0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH},
{0x1002, 0x73E1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH},
{0x1002, 0x73E2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH},
{0x1002, 0x73E3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH},
{0x1002, 0x73FF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH},
/* Aldebaran */

View file

@ -562,6 +562,7 @@ void amdgpu_gmc_tmz_set(struct amdgpu_device *adev)
case CHIP_NAVI14:
case CHIP_NAVI12:
case CHIP_VANGOGH:
case CHIP_YELLOW_CARP:
/* Don't enable it by default yet.
*/
if (amdgpu_tmz < 1) {

View file

@ -160,7 +160,7 @@ int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier,
struct mm_struct *mm, struct page **pages,
uint64_t start, uint64_t npages,
struct hmm_range **phmm_range, bool readonly,
bool mmap_locked)
bool mmap_locked, void *owner)
{
struct hmm_range *hmm_range;
unsigned long timeout;
@ -185,6 +185,7 @@ int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier,
hmm_range->hmm_pfns = pfns;
hmm_range->start = start;
hmm_range->end = start + npages * PAGE_SIZE;
hmm_range->dev_private_owner = owner;
/* Assuming 512MB takes maxmium 1 second to fault page address */
timeout = max(npages >> 17, 1ULL) * HMM_RANGE_DEFAULT_TIMEOUT;

View file

@ -34,7 +34,7 @@ int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier,
struct mm_struct *mm, struct page **pages,
uint64_t start, uint64_t npages,
struct hmm_range **phmm_range, bool readonly,
bool mmap_locked);
bool mmap_locked, void *owner);
int amdgpu_hmm_range_get_pages_done(struct hmm_range *hmm_range);
#if defined(CONFIG_HMM_MIRROR)

View file

@ -93,6 +93,8 @@ struct amdgpu_nbio_funcs {
void (*enable_aspm)(struct amdgpu_device *adev,
bool enable);
void (*program_aspm)(struct amdgpu_device *adev);
void (*apply_lc_spc_mode_wa)(struct amdgpu_device *adev);
void (*apply_l1_link_width_reconfig_wa)(struct amdgpu_device *adev);
};
struct amdgpu_nbio {

View file

@ -590,10 +590,6 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_device *bdev,
mem->bus.offset += adev->gmc.aper_base;
mem->bus.is_iomem = true;
if (adev->gmc.xgmi.connected_to_cpu)
mem->bus.caching = ttm_cached;
else
mem->bus.caching = ttm_write_combined;
break;
default:
return -EINVAL;
@ -695,7 +691,7 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages)
readonly = amdgpu_ttm_tt_is_readonly(ttm);
r = amdgpu_hmm_range_get_pages(&bo->notifier, mm, pages, start,
ttm->num_pages, &gtt->range, readonly,
true);
true, NULL);
out_unlock:
mmap_read_unlock(mm);
mmput(mm);
@ -923,7 +919,8 @@ static int amdgpu_ttm_backend_bind(struct ttm_device *bdev,
bo_mem->mem_type == AMDGPU_PL_OA)
return -EINVAL;
if (!amdgpu_gtt_mgr_has_gart_addr(bo_mem)) {
if (bo_mem->mem_type != TTM_PL_TT ||
!amdgpu_gtt_mgr_has_gart_addr(bo_mem)) {
gtt->offset = AMDGPU_BO_INVALID_OFFSET;
return 0;
}

View file

@ -463,6 +463,11 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
if (i == 1)
node->base.placement |= TTM_PL_FLAG_CONTIGUOUS;
if (adev->gmc.xgmi.connected_to_cpu)
node->base.bus.caching = ttm_cached;
else
node->base.bus.caching = ttm_write_combined;
atomic64_add(vis_usage, &mgr->vis_usage);
*res = &node->base;
return 0;

View file

@ -36,9 +36,12 @@ athub_v2_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
{
uint32_t def, data;
if (!(adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG))
return;
def = data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL);
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG))
if (enable)
data |= ATHUB_MISC_CNTL__CG_ENABLE_MASK;
else
data &= ~ATHUB_MISC_CNTL__CG_ENABLE_MASK;
@ -53,10 +56,13 @@ athub_v2_0_update_medium_grain_light_sleep(struct amdgpu_device *adev,
{
uint32_t def, data;
if (!((adev->cg_flags & AMD_CG_SUPPORT_MC_LS) &&
(adev->cg_flags & AMD_CG_SUPPORT_HDP_LS)))
return;
def = data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL);
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS) &&
(adev->cg_flags & AMD_CG_SUPPORT_HDP_LS))
if (enable)
data |= ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK;
else
data &= ~ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK;

View file

@ -5086,47 +5086,44 @@ static void gfx_v10_0_tcp_harvest(struct amdgpu_device *adev)
4 + /* RMI */
1); /* SQG */
if (adev->asic_type == CHIP_NAVI10 ||
adev->asic_type == CHIP_NAVI14 ||
adev->asic_type == CHIP_NAVI12) {
mutex_lock(&adev->grbm_idx_mutex);
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff);
wgp_active_bitmap = gfx_v10_0_get_wgp_active_bitmap_per_sh(adev);
/*
* Set corresponding TCP bits for the inactive WGPs in
* GCRD_SA_TARGETS_DISABLE
*/
gcrd_targets_disable_tcp = 0;
/* Set TCP & SQC bits in UTCL1_UTCL0_INVREQ_DISABLE */
utcl_invreq_disable = 0;
mutex_lock(&adev->grbm_idx_mutex);
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff);
wgp_active_bitmap = gfx_v10_0_get_wgp_active_bitmap_per_sh(adev);
/*
* Set corresponding TCP bits for the inactive WGPs in
* GCRD_SA_TARGETS_DISABLE
*/
gcrd_targets_disable_tcp = 0;
/* Set TCP & SQC bits in UTCL1_UTCL0_INVREQ_DISABLE */
utcl_invreq_disable = 0;
for (k = 0; k < max_wgp_per_sh; k++) {
if (!(wgp_active_bitmap & (1 << k))) {
gcrd_targets_disable_tcp |= 3 << (2 * k);
utcl_invreq_disable |= (3 << (2 * k)) |
(3 << (2 * (max_wgp_per_sh + k)));
}
for (k = 0; k < max_wgp_per_sh; k++) {
if (!(wgp_active_bitmap & (1 << k))) {
gcrd_targets_disable_tcp |= 3 << (2 * k);
gcrd_targets_disable_tcp |= 1 << (k + (max_wgp_per_sh * 2));
utcl_invreq_disable |= (3 << (2 * k)) |
(3 << (2 * (max_wgp_per_sh + k)));
}
tmp = RREG32_SOC15(GC, 0, mmUTCL1_UTCL0_INVREQ_DISABLE);
/* only override TCP & SQC bits */
tmp &= 0xffffffff << (4 * max_wgp_per_sh);
tmp |= (utcl_invreq_disable & utcl_invreq_disable_mask);
WREG32_SOC15(GC, 0, mmUTCL1_UTCL0_INVREQ_DISABLE, tmp);
tmp = RREG32_SOC15(GC, 0, mmGCRD_SA_TARGETS_DISABLE);
/* only override TCP bits */
tmp &= 0xffffffff << (2 * max_wgp_per_sh);
tmp |= (gcrd_targets_disable_tcp & gcrd_targets_disable_mask);
WREG32_SOC15(GC, 0, mmGCRD_SA_TARGETS_DISABLE, tmp);
}
}
gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
mutex_unlock(&adev->grbm_idx_mutex);
tmp = RREG32_SOC15(GC, 0, mmUTCL1_UTCL0_INVREQ_DISABLE);
/* only override TCP & SQC bits */
tmp &= (0xffffffffU << (4 * max_wgp_per_sh));
tmp |= (utcl_invreq_disable & utcl_invreq_disable_mask);
WREG32_SOC15(GC, 0, mmUTCL1_UTCL0_INVREQ_DISABLE, tmp);
tmp = RREG32_SOC15(GC, 0, mmGCRD_SA_TARGETS_DISABLE);
/* only override TCP & SQC bits */
tmp &= (0xffffffffU << (3 * max_wgp_per_sh));
tmp |= (gcrd_targets_disable_tcp & gcrd_targets_disable_mask);
WREG32_SOC15(GC, 0, mmGCRD_SA_TARGETS_DISABLE, tmp);
}
}
gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
mutex_unlock(&adev->grbm_idx_mutex);
}
static void gfx_v10_0_get_tcc_info(struct amdgpu_device *adev)
@ -7404,7 +7401,10 @@ static int gfx_v10_0_hw_init(void *handle)
* init golden registers and rlc resume may override some registers,
* reconfig them here
*/
gfx_v10_0_tcp_harvest(adev);
if (adev->asic_type == CHIP_NAVI10 ||
adev->asic_type == CHIP_NAVI14 ||
adev->asic_type == CHIP_NAVI12)
gfx_v10_0_tcp_harvest(adev);
r = gfx_v10_0_cp_resume(adev);
if (r)
@ -7777,6 +7777,9 @@ static void gfx_v10_0_update_medium_grain_clock_gating(struct amdgpu_device *ade
{
uint32_t data, def;
if (!(adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)))
return;
/* It is disabled by HW by default */
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
/* 0 - Disable some blocks' MGCG */
@ -7791,6 +7794,7 @@ static void gfx_v10_0_update_medium_grain_clock_gating(struct amdgpu_device *ade
RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK |
RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK |
RLC_CGTT_MGCG_OVERRIDE__ENABLE_CGTS_LEGACY_MASK);
if (def != data)
@ -7813,13 +7817,15 @@ static void gfx_v10_0_update_medium_grain_clock_gating(struct amdgpu_device *ade
WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
}
}
} else {
} else if (!enable || !(adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
/* 1 - MGCG_OVERRIDE */
def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK |
RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK |
RLC_CGTT_MGCG_OVERRIDE__ENABLE_CGTS_LEGACY_MASK);
if (def != data)
WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
@ -7845,22 +7851,34 @@ static void gfx_v10_0_update_3d_clock_gating(struct amdgpu_device *adev,
{
uint32_t data, def;
if (!(adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)))
return;
/* Enable 3D CGCG/CGLS */
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) {
if (enable) {
/* write cmd to clear cgcg/cgls ov */
def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
/* unset CGCG override */
data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
/* update CGCG and CGLS override bits */
if (def != data)
WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
/* enable 3Dcgcg FSM(0x0000363f) */
def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
data = 0;
if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
if (def != data)
WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
@ -7873,9 +7891,14 @@ static void gfx_v10_0_update_3d_clock_gating(struct amdgpu_device *adev,
} else {
/* Disable CGCG/CGLS */
def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
/* disable cgcg, cgls should be disabled */
data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
/* disable cgcg and cgls in FSM */
if (def != data)
WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
@ -7887,25 +7910,35 @@ static void gfx_v10_0_update_coarse_grain_clock_gating(struct amdgpu_device *ade
{
uint32_t def, data;
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
if (!(adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)))
return;
if (enable) {
def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
/* unset CGCG override */
data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)
data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
else
data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
/* update CGCG and CGLS override bits */
if (def != data)
WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
/* enable cgcg FSM(0x0000363F) */
def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
data = 0;
if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)
data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
if (def != data)
WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
@ -7917,8 +7950,14 @@ static void gfx_v10_0_update_coarse_grain_clock_gating(struct amdgpu_device *ade
WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
} else {
def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
/* reset CGCG/CGLS bits */
data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)
data &= ~RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
/* disable cgcg and cgls in FSM */
if (def != data)
WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
@ -7930,7 +7969,10 @@ static void gfx_v10_0_update_fine_grain_clock_gating(struct amdgpu_device *adev,
{
uint32_t def, data;
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_FGCG)) {
if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_FGCG))
return;
if (enable) {
def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
/* unset FGCG override */
data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK;
@ -7961,6 +8003,97 @@ static void gfx_v10_0_update_fine_grain_clock_gating(struct amdgpu_device *adev,
}
}
static void gfx_v10_0_apply_medium_grain_clock_gating_workaround(struct amdgpu_device *adev)
{
uint32_t reg_data = 0;
uint32_t reg_idx = 0;
uint32_t i;
const uint32_t tcp_ctrl_regs[] = {
mmCGTS_SA0_WGP00_CU0_TCP_CTRL_REG,
mmCGTS_SA0_WGP00_CU1_TCP_CTRL_REG,
mmCGTS_SA0_WGP01_CU0_TCP_CTRL_REG,
mmCGTS_SA0_WGP01_CU1_TCP_CTRL_REG,
mmCGTS_SA0_WGP02_CU0_TCP_CTRL_REG,
mmCGTS_SA0_WGP02_CU1_TCP_CTRL_REG,
mmCGTS_SA0_WGP10_CU0_TCP_CTRL_REG,
mmCGTS_SA0_WGP10_CU1_TCP_CTRL_REG,
mmCGTS_SA0_WGP11_CU0_TCP_CTRL_REG,
mmCGTS_SA0_WGP11_CU1_TCP_CTRL_REG,
mmCGTS_SA0_WGP12_CU0_TCP_CTRL_REG,
mmCGTS_SA0_WGP12_CU1_TCP_CTRL_REG,
mmCGTS_SA1_WGP00_CU0_TCP_CTRL_REG,
mmCGTS_SA1_WGP00_CU1_TCP_CTRL_REG,
mmCGTS_SA1_WGP01_CU0_TCP_CTRL_REG,
mmCGTS_SA1_WGP01_CU1_TCP_CTRL_REG,
mmCGTS_SA1_WGP02_CU0_TCP_CTRL_REG,
mmCGTS_SA1_WGP02_CU1_TCP_CTRL_REG,
mmCGTS_SA1_WGP10_CU0_TCP_CTRL_REG,
mmCGTS_SA1_WGP10_CU1_TCP_CTRL_REG,
mmCGTS_SA1_WGP11_CU0_TCP_CTRL_REG,
mmCGTS_SA1_WGP11_CU1_TCP_CTRL_REG,
mmCGTS_SA1_WGP12_CU0_TCP_CTRL_REG,
mmCGTS_SA1_WGP12_CU1_TCP_CTRL_REG
};
const uint32_t tcp_ctrl_regs_nv12[] = {
mmCGTS_SA0_WGP00_CU0_TCP_CTRL_REG,
mmCGTS_SA0_WGP00_CU1_TCP_CTRL_REG,
mmCGTS_SA0_WGP01_CU0_TCP_CTRL_REG,
mmCGTS_SA0_WGP01_CU1_TCP_CTRL_REG,
mmCGTS_SA0_WGP02_CU0_TCP_CTRL_REG,
mmCGTS_SA0_WGP02_CU1_TCP_CTRL_REG,
mmCGTS_SA0_WGP10_CU0_TCP_CTRL_REG,
mmCGTS_SA0_WGP10_CU1_TCP_CTRL_REG,
mmCGTS_SA0_WGP11_CU0_TCP_CTRL_REG,
mmCGTS_SA0_WGP11_CU1_TCP_CTRL_REG,
mmCGTS_SA1_WGP00_CU0_TCP_CTRL_REG,
mmCGTS_SA1_WGP00_CU1_TCP_CTRL_REG,
mmCGTS_SA1_WGP01_CU0_TCP_CTRL_REG,
mmCGTS_SA1_WGP01_CU1_TCP_CTRL_REG,
mmCGTS_SA1_WGP02_CU0_TCP_CTRL_REG,
mmCGTS_SA1_WGP02_CU1_TCP_CTRL_REG,
mmCGTS_SA1_WGP10_CU0_TCP_CTRL_REG,
mmCGTS_SA1_WGP10_CU1_TCP_CTRL_REG,
mmCGTS_SA1_WGP11_CU0_TCP_CTRL_REG,
mmCGTS_SA1_WGP11_CU1_TCP_CTRL_REG,
};
const uint32_t sm_ctlr_regs[] = {
mmCGTS_SA0_QUAD0_SM_CTRL_REG,
mmCGTS_SA0_QUAD1_SM_CTRL_REG,
mmCGTS_SA1_QUAD0_SM_CTRL_REG,
mmCGTS_SA1_QUAD1_SM_CTRL_REG
};
if (adev->asic_type == CHIP_NAVI12) {
for (i = 0; i < ARRAY_SIZE(tcp_ctrl_regs_nv12); i++) {
reg_idx = adev->reg_offset[GC_HWIP][0][mmCGTS_SA0_WGP00_CU0_TCP_CTRL_REG_BASE_IDX] +
tcp_ctrl_regs_nv12[i];
reg_data = RREG32(reg_idx);
reg_data |= CGTS_SA0_WGP00_CU0_TCP_CTRL_REG__TCPI_LS_OVERRIDE_MASK;
WREG32(reg_idx, reg_data);
}
} else {
for (i = 0; i < ARRAY_SIZE(tcp_ctrl_regs); i++) {
reg_idx = adev->reg_offset[GC_HWIP][0][mmCGTS_SA0_WGP00_CU0_TCP_CTRL_REG_BASE_IDX] +
tcp_ctrl_regs[i];
reg_data = RREG32(reg_idx);
reg_data |= CGTS_SA0_WGP00_CU0_TCP_CTRL_REG__TCPI_LS_OVERRIDE_MASK;
WREG32(reg_idx, reg_data);
}
}
for (i = 0; i < ARRAY_SIZE(sm_ctlr_regs); i++) {
reg_idx = adev->reg_offset[GC_HWIP][0][mmCGTS_SA0_QUAD0_SM_CTRL_REG_BASE_IDX] +
sm_ctlr_regs[i];
reg_data = RREG32(reg_idx);
reg_data &= ~CGTS_SA0_QUAD0_SM_CTRL_REG__SM_MODE_MASK;
reg_data |= 2 << CGTS_SA0_QUAD0_SM_CTRL_REG__SM_MODE__SHIFT;
WREG32(reg_idx, reg_data);
}
}
static int gfx_v10_0_update_gfx_clock_gating(struct amdgpu_device *adev,
bool enable)
{
@ -7977,6 +8110,10 @@ static int gfx_v10_0_update_gfx_clock_gating(struct amdgpu_device *adev,
gfx_v10_0_update_3d_clock_gating(adev, enable);
/* === CGCG + CGLS === */
gfx_v10_0_update_coarse_grain_clock_gating(adev, enable);
if ((adev->asic_type >= CHIP_NAVI10) &&
(adev->asic_type <= CHIP_NAVI12))
gfx_v10_0_apply_medium_grain_clock_gating_workaround(adev);
} else {
/* CGCG/CGLS should be disabled before MGCG/MGLS
* === CGCG + CGLS ===
@ -9324,17 +9461,22 @@ static void gfx_v10_0_set_user_wgp_inactive_bitmap_per_sh(struct amdgpu_device *
static u32 gfx_v10_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev)
{
u32 data, wgp_bitmask;
data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
u32 disabled_mask =
~amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh >> 1);
u32 efuse_setting = 0;
u32 vbios_setting = 0;
data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
efuse_setting = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
efuse_setting &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
efuse_setting >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
wgp_bitmask =
amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh >> 1);
vbios_setting = RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
vbios_setting &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
vbios_setting >>= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
return (~data) & wgp_bitmask;
disabled_mask |= efuse_setting | vbios_setting;
return (~disabled_mask);
}
static u32 gfx_v10_0_get_cu_active_bitmap_per_sh(struct amdgpu_device *adev)

View file

@ -90,45 +90,56 @@ static void hdp_v5_0_update_mem_power_gating(struct amdgpu_device *adev,
RC_MEM_POWER_SD_EN, 0);
WREG32_SOC15(HDP, 0, mmHDP_MEM_POWER_CTRL, hdp_mem_pwr_cntl);
/* only one clock gating mode (LS/DS/SD) can be enabled */
if (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS) {
hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
HDP_MEM_POWER_CTRL,
IPH_MEM_POWER_LS_EN, enable);
hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
HDP_MEM_POWER_CTRL,
RC_MEM_POWER_LS_EN, enable);
} else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_DS) {
hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
HDP_MEM_POWER_CTRL,
IPH_MEM_POWER_DS_EN, enable);
hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
HDP_MEM_POWER_CTRL,
RC_MEM_POWER_DS_EN, enable);
} else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_SD) {
hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
HDP_MEM_POWER_CTRL,
IPH_MEM_POWER_SD_EN, enable);
/* RC should not use shut down mode, fallback to ds */
hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
HDP_MEM_POWER_CTRL,
RC_MEM_POWER_DS_EN, enable);
/* Already disabled above. The actions below are for "enabled" only */
if (enable) {
/* only one clock gating mode (LS/DS/SD) can be enabled */
if (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS) {
hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
HDP_MEM_POWER_CTRL,
IPH_MEM_POWER_LS_EN, 1);
hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
HDP_MEM_POWER_CTRL,
RC_MEM_POWER_LS_EN, 1);
} else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_DS) {
hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
HDP_MEM_POWER_CTRL,
IPH_MEM_POWER_DS_EN, 1);
hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
HDP_MEM_POWER_CTRL,
RC_MEM_POWER_DS_EN, 1);
} else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_SD) {
hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
HDP_MEM_POWER_CTRL,
IPH_MEM_POWER_SD_EN, 1);
/* RC should not use shut down mode, fallback to ds or ls if allowed */
if (adev->cg_flags & AMD_CG_SUPPORT_HDP_DS)
hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
HDP_MEM_POWER_CTRL,
RC_MEM_POWER_DS_EN, 1);
else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS)
hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
HDP_MEM_POWER_CTRL,
RC_MEM_POWER_LS_EN, 1);
}
/* confirmed that IPH_MEM_POWER_CTRL_EN and RC_MEM_POWER_CTRL_EN have to
* be set for SRAM LS/DS/SD */
if (adev->cg_flags & (AMD_CG_SUPPORT_HDP_LS | AMD_CG_SUPPORT_HDP_DS |
AMD_CG_SUPPORT_HDP_SD)) {
hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
IPH_MEM_POWER_CTRL_EN, 1);
hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
RC_MEM_POWER_CTRL_EN, 1);
WREG32_SOC15(HDP, 0, mmHDP_MEM_POWER_CTRL, hdp_mem_pwr_cntl);
}
}
/* confirmed that IPH_MEM_POWER_CTRL_EN and RC_MEM_POWER_CTRL_EN have to
* be set for SRAM LS/DS/SD */
if (adev->cg_flags & (AMD_CG_SUPPORT_HDP_LS | AMD_CG_SUPPORT_HDP_DS |
AMD_CG_SUPPORT_HDP_SD)) {
hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
IPH_MEM_POWER_CTRL_EN, 1);
hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
RC_MEM_POWER_CTRL_EN, 1);
}
WREG32_SOC15(HDP, 0, mmHDP_MEM_POWER_CTRL, hdp_mem_pwr_cntl);
/* restore IPH & RC clock override after clock/power mode changing */
WREG32_SOC15(HDP, 0, mmHDP_CLK_CNTL, hdp_clk_cntl1);
/* disable IPH & RC clock override after clock/power mode changing */
hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL,
IPH_MEM_CLK_SOFT_OVERRIDE, 0);
hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL,
RC_MEM_CLK_SOFT_OVERRIDE, 0);
WREG32_SOC15(HDP, 0, mmHDP_CLK_CNTL, hdp_clk_cntl);
}
static void hdp_v5_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,

View file

@ -568,6 +568,9 @@ static void mmhub_v2_0_update_medium_grain_clock_gating(struct amdgpu_device *ad
{
uint32_t def, data, def1, data1;
if (!(adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG))
return;
switch (adev->asic_type) {
case CHIP_SIENNA_CICHLID:
case CHIP_NAVY_FLOUNDER:
@ -582,7 +585,7 @@ static void mmhub_v2_0_update_medium_grain_clock_gating(struct amdgpu_device *ad
break;
}
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)) {
if (enable) {
data |= MM_ATC_L2_MISC_CG__ENABLE_MASK;
data1 &= ~(DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK |
@ -627,6 +630,9 @@ static void mmhub_v2_0_update_medium_grain_light_sleep(struct amdgpu_device *ade
{
uint32_t def, data;
if (!(adev->cg_flags & AMD_CG_SUPPORT_MC_LS))
return;
switch (adev->asic_type) {
case CHIP_SIENNA_CICHLID:
case CHIP_NAVY_FLOUNDER:
@ -639,7 +645,7 @@ static void mmhub_v2_0_update_medium_grain_light_sleep(struct amdgpu_device *ade
break;
}
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS))
if (enable)
data |= MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK;
else
data &= ~MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK;

View file

@ -51,6 +51,8 @@
#define mmBIF_MMSCH1_DOORBELL_RANGE 0x01d8
#define mmBIF_MMSCH1_DOORBELL_RANGE_BASE_IDX 2
#define smnPCIE_LC_LINK_WIDTH_CNTL 0x11140288
static void nbio_v2_3_remap_hdp_registers(struct amdgpu_device *adev)
{
WREG32_SOC15(NBIO, 0, mmREMAP_HDP_MEM_FLUSH_CNTL,
@ -218,8 +220,11 @@ static void nbio_v2_3_update_medium_grain_clock_gating(struct amdgpu_device *ade
{
uint32_t def, data;
if (!(adev->cg_flags & AMD_CG_SUPPORT_BIF_MGCG))
return;
def = data = RREG32_PCIE(smnCPM_CONTROL);
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_MGCG)) {
if (enable) {
data |= (CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK |
CPM_CONTROL__TXCLK_DYN_GATE_ENABLE_MASK |
CPM_CONTROL__TXCLK_LCNT_GATE_ENABLE_MASK |
@ -244,8 +249,11 @@ static void nbio_v2_3_update_medium_grain_light_sleep(struct amdgpu_device *adev
{
uint32_t def, data;
if (!(adev->cg_flags & AMD_CG_SUPPORT_BIF_LS))
return;
def = data = RREG32_PCIE(smnPCIE_CNTL2);
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_LS)) {
if (enable) {
data |= (PCIE_CNTL2__SLV_MEM_LS_EN_MASK |
PCIE_CNTL2__MST_MEM_LS_EN_MASK |
PCIE_CNTL2__REPLAY_MEM_LS_EN_MASK);
@ -463,6 +471,43 @@ static void nbio_v2_3_program_aspm(struct amdgpu_device *adev)
WREG32_PCIE(smnPCIE_LC_CNTL3, data);
}
static void nbio_v2_3_apply_lc_spc_mode_wa(struct amdgpu_device *adev)
{
uint32_t reg_data = 0;
uint32_t link_width = 0;
if (!((adev->asic_type >= CHIP_NAVI10) &&
(adev->asic_type <= CHIP_NAVI12)))
return;
reg_data = RREG32_PCIE(smnPCIE_LC_LINK_WIDTH_CNTL);
link_width = (reg_data & PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_RD_MASK)
>> PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_RD__SHIFT;
/*
* Program PCIE_LC_CNTL6.LC_SPC_MODE_8GT to 0x2 (4 symbols per clock data)
* if link_width is 0x3 (x4)
*/
if (0x3 == link_width) {
reg_data = RREG32_PCIE(smnPCIE_LC_CNTL6);
reg_data &= ~PCIE_LC_CNTL6__LC_SPC_MODE_8GT_MASK;
reg_data |= (0x2 << PCIE_LC_CNTL6__LC_SPC_MODE_8GT__SHIFT);
WREG32_PCIE(smnPCIE_LC_CNTL6, reg_data);
}
}
static void nbio_v2_3_apply_l1_link_width_reconfig_wa(struct amdgpu_device *adev)
{
uint32_t reg_data = 0;
if (adev->asic_type != CHIP_NAVI10)
return;
reg_data = RREG32_PCIE(smnPCIE_LC_LINK_WIDTH_CNTL);
reg_data |= PCIE_LC_LINK_WIDTH_CNTL__LC_L1_RECONFIG_EN_MASK;
WREG32_PCIE(smnPCIE_LC_LINK_WIDTH_CNTL, reg_data);
}
const struct amdgpu_nbio_funcs nbio_v2_3_funcs = {
.get_hdp_flush_req_offset = nbio_v2_3_get_hdp_flush_req_offset,
.get_hdp_flush_done_offset = nbio_v2_3_get_hdp_flush_done_offset,
@ -484,4 +529,6 @@ const struct amdgpu_nbio_funcs nbio_v2_3_funcs = {
.remap_hdp_registers = nbio_v2_3_remap_hdp_registers,
.enable_aspm = nbio_v2_3_enable_aspm,
.program_aspm = nbio_v2_3_program_aspm,
.apply_lc_spc_mode_wa = nbio_v2_3_apply_lc_spc_mode_wa,
.apply_l1_link_width_reconfig_wa = nbio_v2_3_apply_l1_link_width_reconfig_wa,
};

View file

@ -64,6 +64,13 @@
#include "smuio_v11_0.h"
#include "smuio_v11_0_6.h"
#define codec_info_build(type, width, height, level) \
.codec_type = type,\
.max_width = width,\
.max_height = height,\
.max_pixels_per_frame = height * width,\
.max_level = level,
static const struct amd_ip_funcs nv_common_ip_funcs;
/* Navi */
@ -309,6 +316,23 @@ static struct amdgpu_video_codecs sriov_sc_video_codecs_decode =
.codec_array = sriov_sc_video_codecs_decode_array,
};
/* Beige Goby*/
static const struct amdgpu_video_codec_info bg_video_codecs_decode_array[] = {
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
};
static const struct amdgpu_video_codecs bg_video_codecs_decode = {
.codec_count = ARRAY_SIZE(bg_video_codecs_decode_array),
.codec_array = bg_video_codecs_decode_array,
};
static const struct amdgpu_video_codecs bg_video_codecs_encode = {
.codec_count = 0,
.codec_array = NULL,
};
static int nv_query_video_codecs(struct amdgpu_device *adev, bool encode,
const struct amdgpu_video_codecs **codecs)
{
@ -335,6 +359,12 @@ static int nv_query_video_codecs(struct amdgpu_device *adev, bool encode,
else
*codecs = &sc_video_codecs_decode;
return 0;
case CHIP_BEIGE_GOBY:
if (encode)
*codecs = &bg_video_codecs_encode;
else
*codecs = &bg_video_codecs_decode;
return 0;
case CHIP_NAVI10:
case CHIP_NAVI14:
case CHIP_NAVI12:
@ -1275,7 +1305,6 @@ static int nv_common_early_init(void *handle)
break;
case CHIP_VANGOGH:
adev->apu_flags |= AMD_APU_IS_VANGOGH;
adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
AMD_CG_SUPPORT_GFX_MGLS |
AMD_CG_SUPPORT_GFX_CP_LS |
@ -1411,6 +1440,12 @@ static int nv_common_hw_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
if (adev->nbio.funcs->apply_lc_spc_mode_wa)
adev->nbio.funcs->apply_lc_spc_mode_wa(adev);
if (adev->nbio.funcs->apply_l1_link_width_reconfig_wa)
adev->nbio.funcs->apply_l1_link_width_reconfig_wa(adev);
/* enable pcie gen2/3 link */
nv_pcie_gen3_enable(adev);
/* enable aspm */

View file

@ -144,7 +144,7 @@ static const struct soc15_reg_golden golden_settings_sdma_4_1[] = {
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_IB_CNTL, 0x800f0111, 0x00000100),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003e0),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_WATERMK, 0xfc000000, 0x00000000)
};
@ -288,7 +288,7 @@ static const struct soc15_reg_golden golden_settings_sdma_4_3[] = {
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_POWER_CNTL, 0x003fff07, 0x40000051),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003e0),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_WATERMK, 0xfc000000, 0x03fbe1fe)
};
@ -1896,8 +1896,11 @@ static int sdma_v4_0_late_init(void *handle)
sdma_v4_0_setup_ulv(adev);
if (adev->sdma.funcs && adev->sdma.funcs->reset_ras_error_count)
adev->sdma.funcs->reset_ras_error_count(adev);
if (!amdgpu_persistent_edc_harvesting_supported(adev)) {
if (adev->sdma.funcs &&
adev->sdma.funcs->reset_ras_error_count)
adev->sdma.funcs->reset_ras_error_count(adev);
}
if (adev->sdma.funcs && adev->sdma.funcs->ras_late_init)
return adev->sdma.funcs->ras_late_init(adev, &ih_info);

View file

@ -43,9 +43,12 @@ static void smuio_v11_0_update_rom_clock_gating(struct amdgpu_device *adev, bool
if (adev->flags & AMD_IS_APU)
return;
if (!(adev->cg_flags & AMD_CG_SUPPORT_ROM_MGCG))
return;
def = data = RREG32_SOC15(SMUIO, 0, mmCGTT_ROM_CLK_CTRL0);
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_ROM_MGCG))
if (enable)
data &= ~(CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE0_MASK |
CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE1_MASK);
else

View file

@ -1360,10 +1360,7 @@ static int soc15_common_early_init(void *handle)
break;
case CHIP_RAVEN:
adev->asic_funcs = &soc15_asic_funcs;
if (adev->pdev->device == 0x15dd)
adev->apu_flags |= AMD_APU_IS_RAVEN;
if (adev->pdev->device == 0x15d8)
adev->apu_flags |= AMD_APU_IS_PICASSO;
if (adev->rev_id >= 0x8)
adev->apu_flags |= AMD_APU_IS_RAVEN2;
@ -1455,11 +1452,6 @@ static int soc15_common_early_init(void *handle)
break;
case CHIP_RENOIR:
adev->asic_funcs = &soc15_asic_funcs;
if ((adev->pdev->device == 0x1636) ||
(adev->pdev->device == 0x164c))
adev->apu_flags |= AMD_APU_IS_RENOIR;
else
adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE;
if (adev->apu_flags & AMD_APU_IS_RENOIR)
adev->external_rev_id = adev->rev_id + 0x91;

View file

@ -218,7 +218,8 @@ svm_migrate_get_vram_page(struct svm_range *prange, unsigned long pfn)
struct page *page;
page = pfn_to_page(pfn);
page->zone_device_data = prange;
svm_range_bo_ref(prange->svm_bo);
page->zone_device_data = prange->svm_bo;
get_page(page);
lock_page(page);
}
@ -293,15 +294,13 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
for (i = j = 0; i < npages; i++) {
struct page *spage;
dst[i] = cursor.start + (j << PAGE_SHIFT);
migrate->dst[i] = svm_migrate_addr_to_pfn(adev, dst[i]);
svm_migrate_get_vram_page(prange, migrate->dst[i]);
migrate->dst[i] = migrate_pfn(migrate->dst[i]);
migrate->dst[i] |= MIGRATE_PFN_LOCKED;
if (migrate->src[i] & MIGRATE_PFN_VALID) {
spage = migrate_pfn_to_page(migrate->src[i]);
spage = migrate_pfn_to_page(migrate->src[i]);
if (spage && !is_zone_device_page(spage)) {
dst[i] = cursor.start + (j << PAGE_SHIFT);
migrate->dst[i] = svm_migrate_addr_to_pfn(adev, dst[i]);
svm_migrate_get_vram_page(prange, migrate->dst[i]);
migrate->dst[i] = migrate_pfn(migrate->dst[i]);
migrate->dst[i] |= MIGRATE_PFN_LOCKED;
src[i] = dma_map_page(dev, spage, 0, PAGE_SIZE,
DMA_TO_DEVICE);
r = dma_mapping_error(dev, src[i]);
@ -355,6 +354,20 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
}
}
#ifdef DEBUG_FORCE_MIXED_DOMAINS
for (i = 0, j = 0; i < npages; i += 4, j++) {
if (j & 1)
continue;
svm_migrate_put_vram_page(adev, dst[i]);
migrate->dst[i] = 0;
svm_migrate_put_vram_page(adev, dst[i + 1]);
migrate->dst[i + 1] = 0;
svm_migrate_put_vram_page(adev, dst[i + 2]);
migrate->dst[i + 2] = 0;
svm_migrate_put_vram_page(adev, dst[i + 3]);
migrate->dst[i + 3] = 0;
}
#endif
out:
return r;
}
@ -365,20 +378,20 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
uint64_t end)
{
uint64_t npages = (end - start) >> PAGE_SHIFT;
struct kfd_process_device *pdd;
struct dma_fence *mfence = NULL;
struct migrate_vma migrate;
dma_addr_t *scratch;
size_t size;
void *buf;
int r = -ENOMEM;
int retry = 0;
memset(&migrate, 0, sizeof(migrate));
migrate.vma = vma;
migrate.start = start;
migrate.end = end;
migrate.flags = MIGRATE_VMA_SELECT_SYSTEM;
migrate.pgmap_owner = adev;
migrate.pgmap_owner = SVM_ADEV_PGMAP_OWNER(adev);
size = 2 * sizeof(*migrate.src) + sizeof(uint64_t) + sizeof(dma_addr_t);
size *= npages;
@ -390,7 +403,6 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
migrate.dst = migrate.src + npages;
scratch = (dma_addr_t *)(migrate.dst + npages);
retry:
r = migrate_vma_setup(&migrate);
if (r) {
pr_debug("failed %d prepare migrate svms 0x%p [0x%lx 0x%lx]\n",
@ -398,17 +410,9 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
goto out_free;
}
if (migrate.cpages != npages) {
pr_debug("collect 0x%lx/0x%llx pages, retry\n", migrate.cpages,
pr_debug("Partial migration. 0x%lx/0x%llx pages can be migrated\n",
migrate.cpages,
npages);
migrate_vma_finalize(&migrate);
if (retry++ >= 3) {
r = -ENOMEM;
pr_debug("failed %d migrate svms 0x%p [0x%lx 0x%lx]\n",
r, prange->svms, prange->start, prange->last);
goto out_free;
}
goto retry;
}
if (migrate.cpages) {
@ -425,6 +429,12 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
out_free:
kvfree(buf);
out:
if (!r) {
pdd = svm_range_get_pdd_by_adev(prange, adev);
if (pdd)
WRITE_ONCE(pdd->page_in, pdd->page_in + migrate.cpages);
}
return r;
}
@ -464,7 +474,7 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
prange->start, prange->last, best_loc);
/* FIXME: workaround for page locking bug with invalid pages */
svm_range_prefault(prange, mm);
svm_range_prefault(prange, mm, SVM_ADEV_PGMAP_OWNER(adev));
start = prange->start << PAGE_SHIFT;
end = (prange->last + 1) << PAGE_SHIFT;
@ -493,15 +503,19 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
static void svm_migrate_page_free(struct page *page)
{
/* Keep this function to avoid warning */
struct svm_range_bo *svm_bo = page->zone_device_data;
if (svm_bo) {
pr_debug("svm_bo ref left: %d\n", kref_read(&svm_bo->kref));
svm_range_bo_unref(svm_bo);
}
}
static int
svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
struct migrate_vma *migrate, struct dma_fence **mfence,
dma_addr_t *scratch)
dma_addr_t *scratch, uint64_t npages)
{
uint64_t npages = migrate->cpages;
struct device *dev = adev->dev;
uint64_t *src;
dma_addr_t *dst;
@ -518,15 +532,23 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
src = (uint64_t *)(scratch + npages);
dst = scratch;
for (i = 0, j = 0; i < npages; i++, j++, addr += PAGE_SIZE) {
for (i = 0, j = 0; i < npages; i++, addr += PAGE_SIZE) {
struct page *spage;
spage = migrate_pfn_to_page(migrate->src[i]);
if (!spage) {
pr_debug("failed get spage svms 0x%p [0x%lx 0x%lx]\n",
if (!spage || !is_zone_device_page(spage)) {
pr_debug("invalid page. Could be in CPU already svms 0x%p [0x%lx 0x%lx]\n",
prange->svms, prange->start, prange->last);
r = -ENOMEM;
goto out_oom;
if (j) {
r = svm_migrate_copy_memory_gart(adev, dst + i - j,
src + i - j, j,
FROM_VRAM_TO_RAM,
mfence);
if (r)
goto out_oom;
j = 0;
}
continue;
}
src[i] = svm_migrate_addr(adev, spage);
if (i > 0 && src[i] != src[i - 1] + PAGE_SIZE) {
@ -559,6 +581,7 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
migrate->dst[i] = migrate_pfn(page_to_pfn(dpage));
migrate->dst[i] |= MIGRATE_PFN_LOCKED;
j++;
}
r = svm_migrate_copy_memory_gart(adev, dst + i - j, src + i - j, j,
@ -581,6 +604,7 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
struct vm_area_struct *vma, uint64_t start, uint64_t end)
{
uint64_t npages = (end - start) >> PAGE_SHIFT;
struct kfd_process_device *pdd;
struct dma_fence *mfence = NULL;
struct migrate_vma migrate;
dma_addr_t *scratch;
@ -593,7 +617,7 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
migrate.start = start;
migrate.end = end;
migrate.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE;
migrate.pgmap_owner = adev;
migrate.pgmap_owner = SVM_ADEV_PGMAP_OWNER(adev);
size = 2 * sizeof(*migrate.src) + sizeof(uint64_t) + sizeof(dma_addr_t);
size *= npages;
@ -616,7 +640,7 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
if (migrate.cpages) {
r = svm_migrate_copy_to_ram(adev, prange, &migrate, &mfence,
scratch);
scratch, npages);
migrate_vma_pages(&migrate);
svm_migrate_copy_done(adev, mfence);
migrate_vma_finalize(&migrate);
@ -630,6 +654,12 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
out_free:
kvfree(buf);
out:
if (!r) {
pdd = svm_range_get_pdd_by_adev(prange, adev);
if (pdd)
WRITE_ONCE(pdd->page_out,
pdd->page_out + migrate.cpages);
}
return r;
}
@ -859,7 +889,7 @@ int svm_migrate_init(struct amdgpu_device *adev)
pgmap->range.start = res->start;
pgmap->range.end = res->end;
pgmap->ops = &svm_migrate_pgmap_ops;
pgmap->owner = adev;
pgmap->owner = SVM_ADEV_PGMAP_OWNER(adev);
pgmap->flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE;
r = devm_memremap_pages(adev->dev, pgmap);
if (IS_ERR(r)) {

View file

@ -730,6 +730,15 @@ struct kfd_process_device {
* number of CU's a device has along with number of other competing processes
*/
struct attribute attr_cu_occupancy;
/* sysfs counters for GPU retry fault and page migration tracking */
struct kobject *kobj_counters;
struct attribute attr_faults;
struct attribute attr_page_in;
struct attribute attr_page_out;
uint64_t faults;
uint64_t page_in;
uint64_t page_out;
};
#define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd)

View file

@ -416,6 +416,29 @@ static ssize_t kfd_procfs_stats_show(struct kobject *kobj,
return 0;
}
static ssize_t kfd_sysfs_counters_show(struct kobject *kobj,
struct attribute *attr, char *buf)
{
struct kfd_process_device *pdd;
if (!strcmp(attr->name, "faults")) {
pdd = container_of(attr, struct kfd_process_device,
attr_faults);
return sysfs_emit(buf, "%llu\n", READ_ONCE(pdd->faults));
}
if (!strcmp(attr->name, "page_in")) {
pdd = container_of(attr, struct kfd_process_device,
attr_page_in);
return sysfs_emit(buf, "%llu\n", READ_ONCE(pdd->page_in));
}
if (!strcmp(attr->name, "page_out")) {
pdd = container_of(attr, struct kfd_process_device,
attr_page_out);
return sysfs_emit(buf, "%llu\n", READ_ONCE(pdd->page_out));
}
return 0;
}
static struct attribute attr_queue_size = {
.name = "size",
.mode = KFD_SYSFS_FILE_MODE
@ -451,13 +474,18 @@ static const struct sysfs_ops procfs_stats_ops = {
.show = kfd_procfs_stats_show,
};
static struct attribute *procfs_stats_attrs[] = {
NULL
};
static struct kobj_type procfs_stats_type = {
.sysfs_ops = &procfs_stats_ops,
.default_attrs = procfs_stats_attrs,
.release = kfd_procfs_kobj_release,
};
static const struct sysfs_ops sysfs_counters_ops = {
.show = kfd_sysfs_counters_show,
};
static struct kobj_type sysfs_counters_type = {
.sysfs_ops = &sysfs_counters_ops,
.release = kfd_procfs_kobj_release,
};
int kfd_procfs_add_queue(struct queue *q)
@ -484,34 +512,31 @@ int kfd_procfs_add_queue(struct queue *q)
return 0;
}
static int kfd_sysfs_create_file(struct kfd_process *p, struct attribute *attr,
static void kfd_sysfs_create_file(struct kobject *kobj, struct attribute *attr,
char *name)
{
int ret = 0;
int ret;
if (!p || !attr || !name)
return -EINVAL;
if (!kobj || !attr || !name)
return;
attr->name = name;
attr->mode = KFD_SYSFS_FILE_MODE;
sysfs_attr_init(attr);
ret = sysfs_create_file(p->kobj, attr);
return ret;
ret = sysfs_create_file(kobj, attr);
if (ret)
pr_warn("Create sysfs %s/%s failed %d", kobj->name, name, ret);
}
static int kfd_procfs_add_sysfs_stats(struct kfd_process *p)
static void kfd_procfs_add_sysfs_stats(struct kfd_process *p)
{
int ret = 0;
int ret;
int i;
char stats_dir_filename[MAX_SYSFS_FILENAME_LEN];
if (!p)
return -EINVAL;
if (!p->kobj)
return -EFAULT;
if (!p || !p->kobj)
return;
/*
* Create sysfs files for each GPU:
@ -521,63 +546,87 @@ static int kfd_procfs_add_sysfs_stats(struct kfd_process *p)
*/
for (i = 0; i < p->n_pdds; i++) {
struct kfd_process_device *pdd = p->pdds[i];
struct kobject *kobj_stats;
snprintf(stats_dir_filename, MAX_SYSFS_FILENAME_LEN,
"stats_%u", pdd->dev->id);
kobj_stats = kfd_alloc_struct(kobj_stats);
if (!kobj_stats)
return -ENOMEM;
pdd->kobj_stats = kfd_alloc_struct(pdd->kobj_stats);
if (!pdd->kobj_stats)
return;
ret = kobject_init_and_add(kobj_stats,
&procfs_stats_type,
p->kobj,
stats_dir_filename);
ret = kobject_init_and_add(pdd->kobj_stats,
&procfs_stats_type,
p->kobj,
stats_dir_filename);
if (ret) {
pr_warn("Creating KFD proc/stats_%s folder failed",
stats_dir_filename);
kobject_put(kobj_stats);
goto err;
stats_dir_filename);
kobject_put(pdd->kobj_stats);
pdd->kobj_stats = NULL;
return;
}
pdd->kobj_stats = kobj_stats;
pdd->attr_evict.name = "evicted_ms";
pdd->attr_evict.mode = KFD_SYSFS_FILE_MODE;
sysfs_attr_init(&pdd->attr_evict);
ret = sysfs_create_file(kobj_stats, &pdd->attr_evict);
if (ret)
pr_warn("Creating eviction stats for gpuid %d failed",
(int)pdd->dev->id);
kfd_sysfs_create_file(pdd->kobj_stats, &pdd->attr_evict,
"evicted_ms");
/* Add sysfs file to report compute unit occupancy */
if (pdd->dev->kfd2kgd->get_cu_occupancy != NULL) {
pdd->attr_cu_occupancy.name = "cu_occupancy";
pdd->attr_cu_occupancy.mode = KFD_SYSFS_FILE_MODE;
sysfs_attr_init(&pdd->attr_cu_occupancy);
ret = sysfs_create_file(kobj_stats,
&pdd->attr_cu_occupancy);
if (ret)
pr_warn("Creating %s failed for gpuid: %d",
pdd->attr_cu_occupancy.name,
(int)pdd->dev->id);
}
if (pdd->dev->kfd2kgd->get_cu_occupancy)
kfd_sysfs_create_file(pdd->kobj_stats,
&pdd->attr_cu_occupancy,
"cu_occupancy");
}
err:
return ret;
}
static int kfd_procfs_add_sysfs_files(struct kfd_process *p)
static void kfd_procfs_add_sysfs_counters(struct kfd_process *p)
{
int ret = 0;
int i;
char counters_dir_filename[MAX_SYSFS_FILENAME_LEN];
if (!p)
return -EINVAL;
if (!p || !p->kobj)
return;
if (!p->kobj)
return -EFAULT;
/*
* Create sysfs files for each GPU which supports SVM
* - proc/<pid>/counters_<gpuid>/
* - proc/<pid>/counters_<gpuid>/faults
* - proc/<pid>/counters_<gpuid>/page_in
* - proc/<pid>/counters_<gpuid>/page_out
*/
for_each_set_bit(i, p->svms.bitmap_supported, p->n_pdds) {
struct kfd_process_device *pdd = p->pdds[i];
struct kobject *kobj_counters;
snprintf(counters_dir_filename, MAX_SYSFS_FILENAME_LEN,
"counters_%u", pdd->dev->id);
kobj_counters = kfd_alloc_struct(kobj_counters);
if (!kobj_counters)
return;
ret = kobject_init_and_add(kobj_counters, &sysfs_counters_type,
p->kobj, counters_dir_filename);
if (ret) {
pr_warn("Creating KFD proc/%s folder failed",
counters_dir_filename);
kobject_put(kobj_counters);
return;
}
pdd->kobj_counters = kobj_counters;
kfd_sysfs_create_file(kobj_counters, &pdd->attr_faults,
"faults");
kfd_sysfs_create_file(kobj_counters, &pdd->attr_page_in,
"page_in");
kfd_sysfs_create_file(kobj_counters, &pdd->attr_page_out,
"page_out");
}
}
static void kfd_procfs_add_sysfs_files(struct kfd_process *p)
{
int i;
if (!p || !p->kobj)
return;
/*
* Create sysfs files for each GPU:
@ -589,20 +638,14 @@ static int kfd_procfs_add_sysfs_files(struct kfd_process *p)
snprintf(pdd->vram_filename, MAX_SYSFS_FILENAME_LEN, "vram_%u",
pdd->dev->id);
ret = kfd_sysfs_create_file(p, &pdd->attr_vram, pdd->vram_filename);
if (ret)
pr_warn("Creating vram usage for gpu id %d failed",
(int)pdd->dev->id);
kfd_sysfs_create_file(p->kobj, &pdd->attr_vram,
pdd->vram_filename);
snprintf(pdd->sdma_filename, MAX_SYSFS_FILENAME_LEN, "sdma_%u",
pdd->dev->id);
ret = kfd_sysfs_create_file(p, &pdd->attr_sdma, pdd->sdma_filename);
if (ret)
pr_warn("Creating sdma usage for gpu id %d failed",
(int)pdd->dev->id);
kfd_sysfs_create_file(p->kobj, &pdd->attr_sdma,
pdd->sdma_filename);
}
return ret;
}
void kfd_procfs_del_queue(struct queue *q)
@ -800,28 +843,17 @@ struct kfd_process *kfd_create_process(struct file *filep)
goto out;
}
process->attr_pasid.name = "pasid";
process->attr_pasid.mode = KFD_SYSFS_FILE_MODE;
sysfs_attr_init(&process->attr_pasid);
ret = sysfs_create_file(process->kobj, &process->attr_pasid);
if (ret)
pr_warn("Creating pasid for pid %d failed",
(int)process->lead_thread->pid);
kfd_sysfs_create_file(process->kobj, &process->attr_pasid,
"pasid");
process->kobj_queues = kobject_create_and_add("queues",
process->kobj);
if (!process->kobj_queues)
pr_warn("Creating KFD proc/queues folder failed");
ret = kfd_procfs_add_sysfs_stats(process);
if (ret)
pr_warn("Creating sysfs stats dir for pid %d failed",
(int)process->lead_thread->pid);
ret = kfd_procfs_add_sysfs_files(process);
if (ret)
pr_warn("Creating sysfs usage file for pid %d failed",
(int)process->lead_thread->pid);
kfd_procfs_add_sysfs_stats(process);
kfd_procfs_add_sysfs_files(process);
kfd_procfs_add_sysfs_counters(process);
}
out:
if (!IS_ERR(process))
@ -964,6 +996,50 @@ static void kfd_process_destroy_pdds(struct kfd_process *p)
p->n_pdds = 0;
}
static void kfd_process_remove_sysfs(struct kfd_process *p)
{
struct kfd_process_device *pdd;
int i;
if (!p->kobj)
return;
sysfs_remove_file(p->kobj, &p->attr_pasid);
kobject_del(p->kobj_queues);
kobject_put(p->kobj_queues);
p->kobj_queues = NULL;
for (i = 0; i < p->n_pdds; i++) {
pdd = p->pdds[i];
sysfs_remove_file(p->kobj, &pdd->attr_vram);
sysfs_remove_file(p->kobj, &pdd->attr_sdma);
sysfs_remove_file(pdd->kobj_stats, &pdd->attr_evict);
if (pdd->dev->kfd2kgd->get_cu_occupancy)
sysfs_remove_file(pdd->kobj_stats,
&pdd->attr_cu_occupancy);
kobject_del(pdd->kobj_stats);
kobject_put(pdd->kobj_stats);
pdd->kobj_stats = NULL;
}
for_each_set_bit(i, p->svms.bitmap_supported, p->n_pdds) {
pdd = p->pdds[i];
sysfs_remove_file(pdd->kobj_counters, &pdd->attr_faults);
sysfs_remove_file(pdd->kobj_counters, &pdd->attr_page_in);
sysfs_remove_file(pdd->kobj_counters, &pdd->attr_page_out);
kobject_del(pdd->kobj_counters);
kobject_put(pdd->kobj_counters);
pdd->kobj_counters = NULL;
}
kobject_del(p->kobj);
kobject_put(p->kobj);
p->kobj = NULL;
}
/* No process locking is needed in this function, because the process
* is not findable any more. We must assume that no other thread is
* using it any more, otherwise we couldn't safely free the process
@ -973,33 +1049,7 @@ static void kfd_process_wq_release(struct work_struct *work)
{
struct kfd_process *p = container_of(work, struct kfd_process,
release_work);
int i;
/* Remove the procfs files */
if (p->kobj) {
sysfs_remove_file(p->kobj, &p->attr_pasid);
kobject_del(p->kobj_queues);
kobject_put(p->kobj_queues);
p->kobj_queues = NULL;
for (i = 0; i < p->n_pdds; i++) {
struct kfd_process_device *pdd = p->pdds[i];
sysfs_remove_file(p->kobj, &pdd->attr_vram);
sysfs_remove_file(p->kobj, &pdd->attr_sdma);
sysfs_remove_file(p->kobj, &pdd->attr_evict);
if (pdd->dev->kfd2kgd->get_cu_occupancy != NULL)
sysfs_remove_file(p->kobj, &pdd->attr_cu_occupancy);
kobject_del(pdd->kobj_stats);
kobject_put(pdd->kobj_stats);
pdd->kobj_stats = NULL;
}
kobject_del(p->kobj);
kobject_put(p->kobj);
p->kobj = NULL;
}
kfd_process_remove_sysfs(p);
kfd_iommu_unbind_process(p);
kfd_process_free_outstanding_kfd_bos(p);

View file

@ -153,6 +153,7 @@ void pqm_uninit(struct process_queue_manager *pqm)
if (pqn->q && pqn->q->gws)
amdgpu_amdkfd_remove_gws_from_process(pqm->process->kgd_process_info,
pqn->q->gws);
kfd_procfs_del_queue(pqn->q);
uninit_queue(pqn->q);
list_del(&pqn->process_queue_list);
kfree(pqn);

View file

@ -119,28 +119,40 @@ static void svm_range_remove_notifier(struct svm_range *prange)
}
static int
svm_range_dma_map_dev(struct device *dev, dma_addr_t **dma_addr,
unsigned long *hmm_pfns, uint64_t npages)
svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange,
unsigned long *hmm_pfns, uint32_t gpuidx)
{
enum dma_data_direction dir = DMA_BIDIRECTIONAL;
dma_addr_t *addr = *dma_addr;
dma_addr_t *addr = prange->dma_addr[gpuidx];
struct device *dev = adev->dev;
struct page *page;
int i, r;
if (!addr) {
addr = kvmalloc_array(npages, sizeof(*addr),
addr = kvmalloc_array(prange->npages, sizeof(*addr),
GFP_KERNEL | __GFP_ZERO);
if (!addr)
return -ENOMEM;
*dma_addr = addr;
prange->dma_addr[gpuidx] = addr;
}
for (i = 0; i < npages; i++) {
for (i = 0; i < prange->npages; i++) {
if (WARN_ONCE(addr[i] && !dma_mapping_error(dev, addr[i]),
"leaking dma mapping\n"))
dma_unmap_page(dev, addr[i], PAGE_SIZE, dir);
page = hmm_pfn_to_page(hmm_pfns[i]);
if (is_zone_device_page(page)) {
struct amdgpu_device *bo_adev =
amdgpu_ttm_adev(prange->svm_bo->bo->tbo.bdev);
addr[i] = (hmm_pfns[i] << PAGE_SHIFT) +
bo_adev->vm_manager.vram_base_offset -
bo_adev->kfd.dev->pgmap.range.start;
addr[i] |= SVM_RANGE_VRAM_DOMAIN;
pr_debug("vram address detected: 0x%llx\n", addr[i]);
continue;
}
addr[i] = dma_map_page(dev, page, 0, PAGE_SIZE, dir);
r = dma_mapping_error(dev, addr[i]);
if (r) {
@ -175,8 +187,7 @@ svm_range_dma_map(struct svm_range *prange, unsigned long *bitmap,
}
adev = (struct amdgpu_device *)pdd->dev->kgd;
r = svm_range_dma_map_dev(adev->dev, &prange->dma_addr[gpuidx],
hmm_pfns, prange->npages);
r = svm_range_dma_map_dev(adev, prange, hmm_pfns, gpuidx);
if (r)
break;
}
@ -301,14 +312,6 @@ static bool svm_bo_ref_unless_zero(struct svm_range_bo *svm_bo)
return true;
}
static struct svm_range_bo *svm_range_bo_ref(struct svm_range_bo *svm_bo)
{
if (svm_bo)
kref_get(&svm_bo->kref);
return svm_bo;
}
static void svm_range_bo_release(struct kref *kref)
{
struct svm_range_bo *svm_bo;
@ -347,7 +350,7 @@ static void svm_range_bo_release(struct kref *kref)
kfree(svm_bo);
}
static void svm_range_bo_unref(struct svm_range_bo *svm_bo)
void svm_range_bo_unref(struct svm_range_bo *svm_bo)
{
if (!svm_bo)
return;
@ -564,6 +567,24 @@ svm_range_get_adev_by_id(struct svm_range *prange, uint32_t gpu_id)
return (struct amdgpu_device *)pdd->dev->kgd;
}
struct kfd_process_device *
svm_range_get_pdd_by_adev(struct svm_range *prange, struct amdgpu_device *adev)
{
struct kfd_process *p;
int32_t gpu_idx, gpuid;
int r;
p = container_of(prange->svms, struct kfd_process, svms);
r = kfd_process_gpuid_from_kgd(p, adev, &gpuid, &gpu_idx);
if (r) {
pr_debug("failed to get device id by adev %p\n", adev);
return NULL;
}
return kfd_process_device_from_gpuidx(p, gpu_idx);
}
static int svm_range_bo_validate(void *param, struct amdgpu_bo *bo)
{
struct ttm_operation_ctx ctx = { false, false };
@ -1002,21 +1023,22 @@ svm_range_split_by_granularity(struct kfd_process *p, struct mm_struct *mm,
}
static uint64_t
svm_range_get_pte_flags(struct amdgpu_device *adev, struct svm_range *prange)
svm_range_get_pte_flags(struct amdgpu_device *adev, struct svm_range *prange,
int domain)
{
struct amdgpu_device *bo_adev;
uint32_t flags = prange->flags;
uint32_t mapping_flags = 0;
uint64_t pte_flags;
bool snoop = !prange->ttm_res;
bool snoop = (domain != SVM_RANGE_VRAM_DOMAIN);
bool coherent = flags & KFD_IOCTL_SVM_FLAG_COHERENT;
if (prange->svm_bo && prange->ttm_res)
if (domain == SVM_RANGE_VRAM_DOMAIN)
bo_adev = amdgpu_ttm_adev(prange->svm_bo->bo->tbo.bdev);
switch (adev->asic_type) {
case CHIP_ARCTURUS:
if (prange->svm_bo && prange->ttm_res) {
if (domain == SVM_RANGE_VRAM_DOMAIN) {
if (bo_adev == adev) {
mapping_flags |= coherent ?
AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW;
@ -1032,7 +1054,7 @@ svm_range_get_pte_flags(struct amdgpu_device *adev, struct svm_range *prange)
}
break;
case CHIP_ALDEBARAN:
if (prange->svm_bo && prange->ttm_res) {
if (domain == SVM_RANGE_VRAM_DOMAIN) {
if (bo_adev == adev) {
mapping_flags |= coherent ?
AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW;
@ -1062,14 +1084,14 @@ svm_range_get_pte_flags(struct amdgpu_device *adev, struct svm_range *prange)
mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE;
pte_flags = AMDGPU_PTE_VALID;
pte_flags |= prange->ttm_res ? 0 : AMDGPU_PTE_SYSTEM;
pte_flags |= (domain == SVM_RANGE_VRAM_DOMAIN) ? 0 : AMDGPU_PTE_SYSTEM;
pte_flags |= snoop ? AMDGPU_PTE_SNOOPED : 0;
pte_flags |= amdgpu_gem_va_map_flags(adev, mapping_flags);
pr_debug("svms 0x%p [0x%lx 0x%lx] vram %d PTE 0x%llx mapping 0x%x\n",
prange->svms, prange->start, prange->last,
prange->ttm_res ? 1:0, pte_flags, mapping_flags);
(domain == SVM_RANGE_VRAM_DOMAIN) ? 1:0, pte_flags, mapping_flags);
return pte_flags;
}
@ -1140,31 +1162,41 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
struct amdgpu_bo_va bo_va;
bool table_freed = false;
uint64_t pte_flags;
unsigned long last_start;
int last_domain;
int r = 0;
int64_t i;
pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start,
prange->last);
if (prange->svm_bo && prange->ttm_res) {
if (prange->svm_bo && prange->ttm_res)
bo_va.is_xgmi = amdgpu_xgmi_same_hive(adev, bo_adev);
prange->mapping.bo_va = &bo_va;
}
prange->mapping.start = prange->start;
prange->mapping.last = prange->last;
prange->mapping.offset = prange->ttm_res ? prange->offset : 0;
pte_flags = svm_range_get_pte_flags(adev, prange);
last_start = prange->start;
for (i = 0; i < prange->npages; i++) {
last_domain = dma_addr[i] & SVM_RANGE_VRAM_DOMAIN;
dma_addr[i] &= ~SVM_RANGE_VRAM_DOMAIN;
if ((prange->start + i) < prange->last &&
last_domain == (dma_addr[i + 1] & SVM_RANGE_VRAM_DOMAIN))
continue;
r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, false, NULL,
prange->mapping.start,
prange->mapping.last, pte_flags,
prange->mapping.offset,
prange->ttm_res,
dma_addr, &vm->last_update,
&table_freed);
if (r) {
pr_debug("failed %d to map to gpu 0x%lx\n", r, prange->start);
goto out;
pr_debug("Mapping range [0x%lx 0x%llx] on domain: %s\n",
last_start, prange->start + i, last_domain ? "GPU" : "CPU");
pte_flags = svm_range_get_pte_flags(adev, prange, last_domain);
r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, false, NULL,
last_start,
prange->start + i, pte_flags,
last_start - prange->start,
NULL,
dma_addr,
&vm->last_update,
&table_freed);
if (r) {
pr_debug("failed %d to map to gpu 0x%lx\n", r, prange->start);
goto out;
}
last_start = prange->start + i + 1;
}
r = amdgpu_vm_update_pdes(adev, vm, false);
@ -1185,7 +1217,6 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
p->pasid, TLB_FLUSH_LEGACY);
}
out:
prange->mapping.bo_va = NULL;
return r;
}
@ -1319,6 +1350,17 @@ static void svm_range_unreserve_bos(struct svm_validate_context *ctx)
ttm_eu_backoff_reservation(&ctx->ticket, &ctx->validate_list);
}
static void *kfd_svm_page_owner(struct kfd_process *p, int32_t gpuidx)
{
struct kfd_process_device *pdd;
struct amdgpu_device *adev;
pdd = kfd_process_device_from_gpuidx(p, gpuidx);
adev = (struct amdgpu_device *)pdd->dev->kgd;
return SVM_ADEV_PGMAP_OWNER(adev);
}
/*
* Validation+GPU mapping with concurrent invalidation (MMU notifiers)
*
@ -1349,6 +1391,9 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
{
struct svm_validate_context ctx;
struct hmm_range *hmm_range;
struct kfd_process *p;
void *owner;
int32_t idx;
int r = 0;
ctx.process = container_of(prange->svms, struct kfd_process, svms);
@ -1394,33 +1439,38 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
svm_range_reserve_bos(&ctx);
if (!prange->actual_loc) {
r = amdgpu_hmm_range_get_pages(&prange->notifier, mm, NULL,
prange->start << PAGE_SHIFT,
prange->npages, &hmm_range,
false, true);
if (r) {
pr_debug("failed %d to get svm range pages\n", r);
goto unreserve_out;
p = container_of(prange->svms, struct kfd_process, svms);
owner = kfd_svm_page_owner(p, find_first_bit(ctx.bitmap,
MAX_GPU_INSTANCE));
for_each_set_bit(idx, ctx.bitmap, MAX_GPU_INSTANCE) {
if (kfd_svm_page_owner(p, idx) != owner) {
owner = NULL;
break;
}
r = svm_range_dma_map(prange, ctx.bitmap,
hmm_range->hmm_pfns);
if (r) {
pr_debug("failed %d to dma map range\n", r);
goto unreserve_out;
}
prange->validated_once = true;
}
r = amdgpu_hmm_range_get_pages(&prange->notifier, mm, NULL,
prange->start << PAGE_SHIFT,
prange->npages, &hmm_range,
false, true, owner);
if (r) {
pr_debug("failed %d to get svm range pages\n", r);
goto unreserve_out;
}
r = svm_range_dma_map(prange, ctx.bitmap,
hmm_range->hmm_pfns);
if (r) {
pr_debug("failed %d to dma map range\n", r);
goto unreserve_out;
}
prange->validated_once = true;
svm_range_lock(prange);
if (!prange->actual_loc) {
if (amdgpu_hmm_range_get_pages_done(hmm_range)) {
pr_debug("hmm update the range, need validate again\n");
r = -EAGAIN;
goto unlock_out;
}
if (amdgpu_hmm_range_get_pages_done(hmm_range)) {
pr_debug("hmm update the range, need validate again\n");
r = -EAGAIN;
goto unlock_out;
}
if (!list_empty(&prange->child_list)) {
pr_debug("range split by unmap in parallel, validate again\n");
@ -1572,6 +1622,7 @@ svm_range_evict(struct svm_range *prange, struct mm_struct *mm,
unsigned long start, unsigned long last)
{
struct svm_range_list *svms = prange->svms;
struct svm_range *pchild;
struct kfd_process *p;
int r = 0;
@ -1583,7 +1634,19 @@ svm_range_evict(struct svm_range *prange, struct mm_struct *mm,
if (!p->xnack_enabled) {
int evicted_ranges;
atomic_inc(&prange->invalid);
list_for_each_entry(pchild, &prange->child_list, child_list) {
mutex_lock_nested(&pchild->lock, 1);
if (pchild->start <= last && pchild->last >= start) {
pr_debug("increment pchild invalid [0x%lx 0x%lx]\n",
pchild->start, pchild->last);
atomic_inc(&pchild->invalid);
}
mutex_unlock(&pchild->lock);
}
if (prange->start <= last && prange->last >= start)
atomic_inc(&prange->invalid);
evicted_ranges = atomic_inc_return(&svms->evicted_ranges);
if (evicted_ranges != 1)
return r;
@ -1600,7 +1663,6 @@ svm_range_evict(struct svm_range *prange, struct mm_struct *mm,
schedule_delayed_work(&svms->restore_work,
msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS));
} else {
struct svm_range *pchild;
unsigned long s, l;
pr_debug("invalidate unmap svms 0x%p [0x%lx 0x%lx] from GPUs\n",
@ -2311,6 +2373,27 @@ static bool svm_range_skip_recover(struct svm_range *prange)
return false;
}
static void
svm_range_count_fault(struct amdgpu_device *adev, struct kfd_process *p,
struct svm_range *prange, int32_t gpuidx)
{
struct kfd_process_device *pdd;
if (gpuidx == MAX_GPU_INSTANCE)
/* fault is on different page of same range
* or fault is skipped to recover later
*/
pdd = svm_range_get_pdd_by_adev(prange, adev);
else
/* fault recovered
* or fault cannot recover because GPU no access on the range
*/
pdd = kfd_process_device_from_gpuidx(p, gpuidx);
if (pdd)
WRITE_ONCE(pdd->faults, pdd->faults + 1);
}
int
svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
uint64_t addr)
@ -2320,7 +2403,8 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
struct svm_range *prange;
struct kfd_process *p;
uint64_t timestamp;
int32_t best_loc, gpuidx;
int32_t best_loc;
int32_t gpuidx = MAX_GPU_INSTANCE;
bool write_locked = false;
int r = 0;
@ -2440,6 +2524,9 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
out_unlock_svms:
mutex_unlock(&svms->lock);
mmap_read_unlock(mm);
svm_range_count_fault(adev, p, prange, gpuidx);
mmput(mm);
out:
kfd_unref_process(p);
@ -2662,7 +2749,8 @@ svm_range_best_prefetch_location(struct svm_range *prange)
/* FIXME: This is a workaround for page locking bug when some pages are
* invalid during migration to VRAM
*/
void svm_range_prefault(struct svm_range *prange, struct mm_struct *mm)
void svm_range_prefault(struct svm_range *prange, struct mm_struct *mm,
void *owner)
{
struct hmm_range *hmm_range;
int r;
@ -2673,7 +2761,7 @@ void svm_range_prefault(struct svm_range *prange, struct mm_struct *mm)
r = amdgpu_hmm_range_get_pages(&prange->notifier, mm, NULL,
prange->start << PAGE_SHIFT,
prange->npages, &hmm_range,
false, true);
false, true, owner);
if (!r) {
amdgpu_hmm_range_get_pages_done(hmm_range);
prange->validated_once = true;
@ -2718,16 +2806,6 @@ svm_range_trigger_migration(struct mm_struct *mm, struct svm_range *prange,
best_loc == prange->actual_loc)
return 0;
/*
* Prefetch to GPU without host access flag, set actual_loc to gpu, then
* validate on gpu and map to gpus will be handled afterwards.
*/
if (best_loc && !prange->actual_loc &&
!(prange->flags & KFD_IOCTL_SVM_FLAG_HOST_ACCESS)) {
prange->actual_loc = best_loc;
return 0;
}
if (!best_loc) {
r = svm_migrate_vram_to_ram(prange, mm);
*migrated = !r;

View file

@ -35,6 +35,10 @@
#include "amdgpu.h"
#include "kfd_priv.h"
#define SVM_RANGE_VRAM_DOMAIN (1UL << 0)
#define SVM_ADEV_PGMAP_OWNER(adev)\
((adev)->hive ? (void *)(adev)->hive : (void *)(adev))
struct svm_range_bo {
struct amdgpu_bo *bo;
struct kref kref;
@ -110,7 +114,6 @@ struct svm_range {
struct list_head update_list;
struct list_head remove_list;
struct list_head insert_list;
struct amdgpu_bo_va_mapping mapping;
uint64_t npages;
dma_addr_t *dma_addr[MAX_GPU_INSTANCE];
struct ttm_resource *ttm_res;
@ -147,6 +150,14 @@ static inline void svm_range_unlock(struct svm_range *prange)
mutex_unlock(&prange->lock);
}
static inline struct svm_range_bo *svm_range_bo_ref(struct svm_range_bo *svm_bo)
{
if (svm_bo)
kref_get(&svm_bo->kref);
return svm_bo;
}
int svm_range_list_init(struct kfd_process *p);
void svm_range_list_fini(struct kfd_process *p);
int svm_ioctl(struct kfd_process *p, enum kfd_ioctl_svm_op op, uint64_t start,
@ -173,13 +184,17 @@ void schedule_deferred_list_work(struct svm_range_list *svms);
void svm_range_dma_unmap(struct device *dev, dma_addr_t *dma_addr,
unsigned long offset, unsigned long npages);
void svm_range_free_dma_mappings(struct svm_range *prange);
void svm_range_prefault(struct svm_range *prange, struct mm_struct *mm);
void svm_range_prefault(struct svm_range *prange, struct mm_struct *mm,
void *owner);
struct kfd_process_device *
svm_range_get_pdd_by_adev(struct svm_range *prange, struct amdgpu_device *adev);
/* SVM API and HMM page migration work together, device memory type
* is initialized to not 0 when page migration register device memory.
*/
#define KFD_IS_SVM_API_SUPPORTED(dev) ((dev)->pgmap.type != 0)
void svm_range_bo_unref(struct svm_range_bo *svm_bo);
#else
struct kfd_process;

View file

@ -1160,6 +1160,9 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
if (amdgpu_dc_feature_mask & DC_DISABLE_FRACTIONAL_PWM_MASK)
init_data.flags.disable_fractional_pwm = true;
if (amdgpu_dc_feature_mask & DC_EDP_NO_POWER_SEQUENCING)
init_data.flags.edp_no_power_sequencing = true;
init_data.flags.power_down_display_on_boot = true;
INIT_LIST_HEAD(&adev->dm.da_list);
@ -10118,7 +10121,7 @@ static int validate_overlay(struct drm_atomic_state *state)
int i;
struct drm_plane *plane;
struct drm_plane_state *new_plane_state;
struct drm_plane_state *primary_state, *cursor_state, *overlay_state = NULL;
struct drm_plane_state *primary_state, *overlay_state = NULL;
/* Check if primary plane is contained inside overlay */
for_each_new_plane_in_state_reverse(state, plane, new_plane_state, i) {
@ -10148,14 +10151,6 @@ static int validate_overlay(struct drm_atomic_state *state)
if (!primary_state->crtc)
return 0;
/* check if cursor plane is enabled */
cursor_state = drm_atomic_get_plane_state(state, overlay_state->crtc->cursor);
if (IS_ERR(cursor_state))
return PTR_ERR(cursor_state);
if (drm_atomic_plane_disabling(plane->state, cursor_state))
return 0;
/* Perform the bounds check to ensure the overlay plane covers the primary */
if (primary_state->crtc_x < overlay_state->crtc_x ||
primary_state->crtc_y < overlay_state->crtc_y ||

View file

@ -297,6 +297,7 @@ struct dc_config {
bool allow_seamless_boot_optimization;
bool power_down_display_on_boot;
bool edp_not_connected;
bool edp_no_power_sequencing;
bool force_enum_edp;
bool forced_clocks;
bool allow_lttpr_non_transparent_mode;

View file

@ -1022,8 +1022,20 @@ void dce110_edp_backlight_control(
/* dc_service_sleep_in_milliseconds(50); */
/*edp 1.2*/
panel_instance = link->panel_cntl->inst;
if (cntl.action == TRANSMITTER_CONTROL_BACKLIGHT_ON)
edp_receiver_ready_T7(link);
if (cntl.action == TRANSMITTER_CONTROL_BACKLIGHT_ON) {
if (!link->dc->config.edp_no_power_sequencing)
/*
* Sometimes, DP receiver chip power-controlled externally by an
* Embedded Controller could be treated and used as eDP,
* if it drives mobile display. In this case,
* we shouldn't be doing power-sequencing, hence we can skip
* waiting for T7-ready.
*/
edp_receiver_ready_T7(link);
else
DC_LOG_DC("edp_receiver_ready_T7 skipped\n");
}
if (ctx->dc->ctx->dmub_srv &&
ctx->dc->debug.dmub_command_table) {
@ -1048,8 +1060,19 @@ void dce110_edp_backlight_control(
dc_link_backlight_enable_aux(link, enable);
/*edp 1.2*/
if (cntl.action == TRANSMITTER_CONTROL_BACKLIGHT_OFF)
edp_add_delay_for_T9(link);
if (cntl.action == TRANSMITTER_CONTROL_BACKLIGHT_OFF) {
if (!link->dc->config.edp_no_power_sequencing)
/*
* Sometimes, DP receiver chip power-controlled externally by an
* Embedded Controller could be treated and used as eDP,
* if it drives mobile display. In this case,
* we shouldn't be doing power-sequencing, hence we can skip
* waiting for T9-ready.
*/
edp_add_delay_for_T9(link);
else
DC_LOG_DC("edp_receiver_ready_T9 skipped\n");
}
if (!enable && link->dpcd_sink_ext_caps.bits.oled)
msleep(OLED_PRE_T11_DELAY);

View file

@ -28,6 +28,7 @@ endif
CFLAGS_$(AMDDALPATH)/dc/dcn31/dcn31_resource.o += -mhard-float
endif
ifdef CONFIG_X86
ifdef IS_OLD_GCC
# Stack alignment mismatch, proceed with caution.
# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3
@ -36,6 +37,7 @@ CFLAGS_$(AMDDALPATH)/dc/dcn31/dcn31_resource.o += -mpreferred-stack-boundary=4
else
CFLAGS_$(AMDDALPATH)/dc/dcn31/dcn31_resource.o += -msse2
endif
endif
AMD_DAL_DCN31 = $(addprefix $(AMDDALPATH)/dc/dcn31/,$(DCN31))

View file

@ -50,6 +50,10 @@ dml_ccflags += -msse2
endif
endif
ifneq ($(CONFIG_FRAME_WARN),0)
frame_warn_flag := -Wframe-larger-than=2048
endif
CFLAGS_$(AMDDALPATH)/dc/dml/display_mode_lib.o := $(dml_ccflags)
ifdef CONFIG_DRM_AMD_DC_DCN
@ -60,9 +64,9 @@ CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20v2.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20v2.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn21/display_mode_vba_21.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn21/display_rq_dlg_calc_21.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_mode_vba_30.o := $(dml_ccflags) -Wframe-larger-than=2048
CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_mode_vba_30.o := $(dml_ccflags) $(frame_warn_flag)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_rq_dlg_calc_30.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn31/display_mode_vba_31.o := $(dml_ccflags) -Wframe-larger-than=2048
CFLAGS_$(AMDDALPATH)/dc/dml/dcn31/display_mode_vba_31.o := $(dml_ccflags) $(frame_warn_flag)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn31/display_rq_dlg_calc_31.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/display_mode_lib.o := $(dml_ccflags)
CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/display_mode_vba.o := $(dml_rcflags)

View file

@ -119,7 +119,7 @@ bool dal_irq_service_set(
dal_irq_service_ack(irq_service, source);
if (info->funcs->set)
if (info->funcs && info->funcs->set)
return info->funcs->set(irq_service, info, enable);
dal_irq_service_set_generic(irq_service, info, enable);
@ -153,7 +153,7 @@ bool dal_irq_service_ack(
return false;
}
if (info->funcs->ack)
if (info->funcs && info->funcs->ack)
return info->funcs->ack(irq_service, info);
dal_irq_service_ack_generic(irq_service, info);

View file

@ -165,7 +165,7 @@ enum irq_type
};
#define DAL_VALID_IRQ_SRC_NUM(src) \
((src) <= DAL_IRQ_SOURCES_NUMBER && (src) > DC_IRQ_SOURCE_INVALID)
((src) < DAL_IRQ_SOURCES_NUMBER && (src) > DC_IRQ_SOURCE_INVALID)
/* Number of Page Flip IRQ Sources. */
#define DAL_PFLIP_IRQ_SRC_NUM \

View file

@ -352,3 +352,63 @@ uint32_t dmub_dcn31_get_current_time(struct dmub_srv *dmub)
{
return REG_READ(DMCUB_TIMER_CURRENT);
}
void dmub_dcn31_get_diagnostic_data(struct dmub_srv *dmub, struct dmub_diagnostic_data *diag_data)
{
uint32_t is_dmub_enabled, is_soft_reset, is_sec_reset;
uint32_t is_traceport_enabled, is_cw0_enabled, is_cw6_enabled;
if (!dmub || !diag_data)
return;
memset(diag_data, 0, sizeof(*diag_data));
diag_data->dmcub_version = dmub->fw_version;
diag_data->scratch[0] = REG_READ(DMCUB_SCRATCH0);
diag_data->scratch[1] = REG_READ(DMCUB_SCRATCH1);
diag_data->scratch[2] = REG_READ(DMCUB_SCRATCH2);
diag_data->scratch[3] = REG_READ(DMCUB_SCRATCH3);
diag_data->scratch[4] = REG_READ(DMCUB_SCRATCH4);
diag_data->scratch[5] = REG_READ(DMCUB_SCRATCH5);
diag_data->scratch[6] = REG_READ(DMCUB_SCRATCH6);
diag_data->scratch[7] = REG_READ(DMCUB_SCRATCH7);
diag_data->scratch[8] = REG_READ(DMCUB_SCRATCH8);
diag_data->scratch[9] = REG_READ(DMCUB_SCRATCH9);
diag_data->scratch[10] = REG_READ(DMCUB_SCRATCH10);
diag_data->scratch[11] = REG_READ(DMCUB_SCRATCH11);
diag_data->scratch[12] = REG_READ(DMCUB_SCRATCH12);
diag_data->scratch[13] = REG_READ(DMCUB_SCRATCH13);
diag_data->scratch[14] = REG_READ(DMCUB_SCRATCH14);
diag_data->scratch[15] = REG_READ(DMCUB_SCRATCH15);
diag_data->undefined_address_fault_addr = REG_READ(DMCUB_UNDEFINED_ADDRESS_FAULT_ADDR);
diag_data->inst_fetch_fault_addr = REG_READ(DMCUB_INST_FETCH_FAULT_ADDR);
diag_data->data_write_fault_addr = REG_READ(DMCUB_DATA_WRITE_FAULT_ADDR);
diag_data->inbox1_rptr = REG_READ(DMCUB_INBOX1_RPTR);
diag_data->inbox1_wptr = REG_READ(DMCUB_INBOX1_WPTR);
diag_data->inbox1_size = REG_READ(DMCUB_INBOX1_SIZE);
diag_data->inbox0_rptr = REG_READ(DMCUB_INBOX0_RPTR);
diag_data->inbox0_wptr = REG_READ(DMCUB_INBOX0_WPTR);
diag_data->inbox0_size = REG_READ(DMCUB_INBOX0_SIZE);
REG_GET(DMCUB_CNTL, DMCUB_ENABLE, &is_dmub_enabled);
diag_data->is_dmcub_enabled = is_dmub_enabled;
REG_GET(DMCUB_CNTL2, DMCUB_SOFT_RESET, &is_soft_reset);
diag_data->is_dmcub_soft_reset = is_soft_reset;
REG_GET(DMCUB_SEC_CNTL, DMCUB_SEC_RESET_STATUS, &is_sec_reset);
diag_data->is_dmcub_secure_reset = is_sec_reset;
REG_GET(DMCUB_CNTL, DMCUB_TRACEPORT_EN, &is_traceport_enabled);
diag_data->is_traceport_en = is_traceport_enabled;
REG_GET(DMCUB_REGION3_CW0_TOP_ADDRESS, DMCUB_REGION3_CW0_ENABLE, &is_cw0_enabled);
diag_data->is_cw0_enabled = is_cw0_enabled;
REG_GET(DMCUB_REGION3_CW6_TOP_ADDRESS, DMCUB_REGION3_CW6_ENABLE, &is_cw6_enabled);
diag_data->is_cw6_enabled = is_cw6_enabled;
}

View file

@ -36,6 +36,9 @@ struct dmub_srv;
DMUB_SR(DMCUB_CNTL) \
DMUB_SR(DMCUB_CNTL2) \
DMUB_SR(DMCUB_SEC_CNTL) \
DMUB_SR(DMCUB_INBOX0_SIZE) \
DMUB_SR(DMCUB_INBOX0_RPTR) \
DMUB_SR(DMCUB_INBOX0_WPTR) \
DMUB_SR(DMCUB_INBOX1_BASE_ADDRESS) \
DMUB_SR(DMCUB_INBOX1_SIZE) \
DMUB_SR(DMCUB_INBOX1_RPTR) \
@ -103,11 +106,15 @@ struct dmub_srv;
DMUB_SR(DMCUB_SCRATCH14) \
DMUB_SR(DMCUB_SCRATCH15) \
DMUB_SR(DMCUB_GPINT_DATAIN1) \
DMUB_SR(DMCUB_GPINT_DATAOUT) \
DMUB_SR(CC_DC_PIPE_DIS) \
DMUB_SR(MMHUBBUB_SOFT_RESET) \
DMUB_SR(DCN_VM_FB_LOCATION_BASE) \
DMUB_SR(DCN_VM_FB_OFFSET) \
DMUB_SR(DMCUB_TIMER_CURRENT)
DMUB_SR(DMCUB_TIMER_CURRENT) \
DMUB_SR(DMCUB_INST_FETCH_FAULT_ADDR) \
DMUB_SR(DMCUB_UNDEFINED_ADDRESS_FAULT_ADDR) \
DMUB_SR(DMCUB_DATA_WRITE_FAULT_ADDR)
#define DMUB_DCN31_FIELDS() \
DMUB_SF(DMCUB_CNTL, DMCUB_ENABLE) \
@ -115,6 +122,7 @@ struct dmub_srv;
DMUB_SF(DMCUB_CNTL2, DMCUB_SOFT_RESET) \
DMUB_SF(DMCUB_SEC_CNTL, DMCUB_SEC_RESET) \
DMUB_SF(DMCUB_SEC_CNTL, DMCUB_MEM_UNIT_ID) \
DMUB_SF(DMCUB_SEC_CNTL, DMCUB_SEC_RESET_STATUS) \
DMUB_SF(DMCUB_REGION3_CW0_TOP_ADDRESS, DMCUB_REGION3_CW0_TOP_ADDRESS) \
DMUB_SF(DMCUB_REGION3_CW0_TOP_ADDRESS, DMCUB_REGION3_CW0_ENABLE) \
DMUB_SF(DMCUB_REGION3_CW1_TOP_ADDRESS, DMCUB_REGION3_CW1_TOP_ADDRESS) \
@ -138,11 +146,13 @@ struct dmub_srv;
DMUB_SF(CC_DC_PIPE_DIS, DC_DMCUB_ENABLE) \
DMUB_SF(MMHUBBUB_SOFT_RESET, DMUIF_SOFT_RESET) \
DMUB_SF(DCN_VM_FB_LOCATION_BASE, FB_BASE) \
DMUB_SF(DCN_VM_FB_OFFSET, FB_OFFSET)
DMUB_SF(DCN_VM_FB_OFFSET, FB_OFFSET) \
DMUB_SF(DMCUB_INBOX0_WPTR, DMCUB_INBOX0_WPTR)
struct dmub_srv_dcn31_reg_offset {
#define DMUB_SR(reg) uint32_t reg;
DMUB_DCN31_REGS()
DMCUB_INTERNAL_REGS()
#undef DMUB_SR
};
@ -227,4 +237,6 @@ void dmub_dcn31_set_outbox0_rptr(struct dmub_srv *dmub, uint32_t rptr_offset);
uint32_t dmub_dcn31_get_current_time(struct dmub_srv *dmub);
void dmub_dcn31_get_diagnostic_data(struct dmub_srv *dmub, struct dmub_diagnostic_data *diag_data);
#endif /* _DMUB_DCN31_H_ */

View file

@ -208,6 +208,7 @@ static bool dmub_srv_hw_setup(struct dmub_srv *dmub, enum dmub_asic asic)
break;
case DMUB_ASIC_DCN31:
dmub->regs_dcn31 = &dmub_srv_dcn31_regs;
funcs->reset = dmub_dcn31_reset;
funcs->reset_release = dmub_dcn31_reset_release;
funcs->backdoor_load = dmub_dcn31_backdoor_load;
@ -231,9 +232,7 @@ static bool dmub_srv_hw_setup(struct dmub_srv *dmub, enum dmub_asic asic)
funcs->get_outbox0_wptr = dmub_dcn31_get_outbox0_wptr;
funcs->set_outbox0_rptr = dmub_dcn31_set_outbox0_rptr;
if (asic == DMUB_ASIC_DCN31) {
dmub->regs_dcn31 = &dmub_srv_dcn31_regs;
}
funcs->get_diagnostic_data = dmub_dcn31_get_diagnostic_data;
funcs->get_current_time = dmub_dcn31_get_current_time;

View file

@ -223,10 +223,12 @@ enum amd_harvest_ip_mask {
};
enum DC_FEATURE_MASK {
DC_FBC_MASK = 0x1,
DC_MULTI_MON_PP_MCLK_SWITCH_MASK = 0x2,
DC_DISABLE_FRACTIONAL_PWM_MASK = 0x4,
DC_PSR_MASK = 0x8,
//Default value can be found at "uint amdgpu_dc_feature_mask"
DC_FBC_MASK = (1 << 0), //0x1, disabled by default
DC_MULTI_MON_PP_MCLK_SWITCH_MASK = (1 << 1), //0x2, enabled by default
DC_DISABLE_FRACTIONAL_PWM_MASK = (1 << 2), //0x4, disabled by default
DC_PSR_MASK = (1 << 3), //0x8, disabled by default
DC_EDP_NO_POWER_SEQUENCING = (1 << 4), //0x10, disabled by default
};
enum DC_DEBUG_MASK {

View file

@ -430,7 +430,7 @@ ARRAY_2D_DEPTH = 0x00000001,
*/
typedef enum ENUM_NUM_SIMD_PER_CU {
NUM_SIMD_PER_CU = 0x00000004,
NUM_SIMD_PER_CU = 0x00000002,
} ENUM_NUM_SIMD_PER_CU;
/*

View file

@ -2902,14 +2902,15 @@ static ssize_t amdgpu_hwmon_show_power_cap_min(struct device *dev,
return sprintf(buf, "%i\n", 0);
}
static ssize_t amdgpu_hwmon_show_power_cap_max(struct device *dev,
struct device_attribute *attr,
char *buf)
static ssize_t amdgpu_hwmon_show_power_cap_generic(struct device *dev,
struct device_attribute *attr,
char *buf,
enum pp_power_limit_level pp_limit_level)
{
struct amdgpu_device *adev = dev_get_drvdata(dev);
const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
enum pp_power_type power_type = to_sensor_dev_attr(attr)->index;
enum pp_power_limit_level pp_limit_level = PP_PWR_LIMIT_MAX;
uint32_t limit;
ssize_t size;
int r;
@ -2919,17 +2920,17 @@ static ssize_t amdgpu_hwmon_show_power_cap_max(struct device *dev,
if (adev->in_suspend && !adev->in_runpm)
return -EPERM;
if ( !(pp_funcs && pp_funcs->get_power_limit))
return -ENODATA;
r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
if (r < 0) {
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
return r;
}
if (pp_funcs && pp_funcs->get_power_limit)
r = pp_funcs->get_power_limit(adev->powerplay.pp_handle, &limit,
pp_limit_level, power_type);
else
r = -ENODATA;
r = pp_funcs->get_power_limit(adev->powerplay.pp_handle, &limit,
pp_limit_level, power_type);
if (!r)
size = sysfs_emit(buf, "%u\n", limit * 1000000);
@ -2942,85 +2943,31 @@ static ssize_t amdgpu_hwmon_show_power_cap_max(struct device *dev,
return size;
}
static ssize_t amdgpu_hwmon_show_power_cap_max(struct device *dev,
struct device_attribute *attr,
char *buf)
{
return amdgpu_hwmon_show_power_cap_generic(dev, attr, buf, PP_PWR_LIMIT_MAX);
}
static ssize_t amdgpu_hwmon_show_power_cap(struct device *dev,
struct device_attribute *attr,
char *buf)
{
struct amdgpu_device *adev = dev_get_drvdata(dev);
const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
enum pp_power_type power_type = to_sensor_dev_attr(attr)->index;
enum pp_power_limit_level pp_limit_level = PP_PWR_LIMIT_CURRENT;
uint32_t limit;
ssize_t size;
int r;
return amdgpu_hwmon_show_power_cap_generic(dev, attr, buf, PP_PWR_LIMIT_CURRENT);
if (amdgpu_in_reset(adev))
return -EPERM;
if (adev->in_suspend && !adev->in_runpm)
return -EPERM;
r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
if (r < 0) {
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
return r;
}
if (pp_funcs && pp_funcs->get_power_limit)
r = pp_funcs->get_power_limit(adev->powerplay.pp_handle, &limit,
pp_limit_level, power_type);
else
r = -ENODATA;
if (!r)
size = sysfs_emit(buf, "%u\n", limit * 1000000);
else
size = sysfs_emit(buf, "\n");
pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
return size;
}
static ssize_t amdgpu_hwmon_show_power_cap_default(struct device *dev,
struct device_attribute *attr,
char *buf)
{
struct amdgpu_device *adev = dev_get_drvdata(dev);
const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
enum pp_power_type power_type = to_sensor_dev_attr(attr)->index;
enum pp_power_limit_level pp_limit_level = PP_PWR_LIMIT_DEFAULT;
uint32_t limit;
ssize_t size;
int r;
return amdgpu_hwmon_show_power_cap_generic(dev, attr, buf, PP_PWR_LIMIT_DEFAULT);
if (amdgpu_in_reset(adev))
return -EPERM;
if (adev->in_suspend && !adev->in_runpm)
return -EPERM;
r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
if (r < 0) {
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
return r;
}
if (pp_funcs && pp_funcs->get_power_limit)
r = pp_funcs->get_power_limit(adev->powerplay.pp_handle, &limit,
pp_limit_level, power_type);
else
r = -ENODATA;
if (!r)
size = sysfs_emit(buf, "%u\n", limit * 1000000);
else
size = sysfs_emit(buf, "\n");
pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
return size;
}
static ssize_t amdgpu_hwmon_show_power_label(struct device *dev,
struct device_attribute *attr,
char *buf)

View file

@ -1453,10 +1453,14 @@ static int smu_hw_fini(void *handle)
if (smu->is_apu) {
smu_powergate_sdma(&adev->smu, true);
smu_dpm_set_vcn_enable(smu, false);
smu_dpm_set_jpeg_enable(smu, false);
}
smu_dpm_set_vcn_enable(smu, false);
smu_dpm_set_jpeg_enable(smu, false);
adev->vcn.cur_state = AMD_PG_STATE_GATE;
adev->jpeg.cur_state = AMD_PG_STATE_GATE;
if (!smu->pm_enabled)
return 0;

View file

@ -189,10 +189,11 @@ static int yellow_carp_init_smc_tables(struct smu_context *smu)
static int yellow_carp_system_features_control(struct smu_context *smu, bool en)
{
struct smu_feature *feature = &smu->smu_feature;
struct amdgpu_device *adev = smu->adev;
uint32_t feature_mask[2];
int ret = 0;
if (!en)
if (!en && !adev->in_s0ix)
ret = smu_cmn_send_smc_msg(smu, SMU_MSG_PrepareMp1ForUnload, NULL);
bitmap_zero(feature->enabled, feature->feature_num);

View file

@ -352,6 +352,7 @@ static struct drm_framebuffer *psb_user_framebuffer_create
const struct drm_mode_fb_cmd2 *cmd)
{
struct drm_gem_object *obj;
struct drm_framebuffer *fb;
/*
* Find the GEM object and thus the gtt range object that is
@ -362,7 +363,11 @@ static struct drm_framebuffer *psb_user_framebuffer_create
return ERR_PTR(-ENOENT);
/* Let the core code do all the work */
return psb_framebuffer_create(dev, cmd, obj);
fb = psb_framebuffer_create(dev, cmd, obj);
if (IS_ERR(fb))
drm_gem_object_put(obj);
return fb;
}
static int psbfb_probe(struct drm_fb_helper *fb_helper,

View file

@ -1791,10 +1791,23 @@ static struct intel_shared_dpll *dg1_ddi_get_pll(struct intel_encoder *encoder)
{
struct drm_i915_private *i915 = to_i915(encoder->base.dev);
enum phy phy = intel_port_to_phy(i915, encoder->port);
enum intel_dpll_id id;
u32 val;
return _cnl_ddi_get_pll(i915, DG1_DPCLKA_CFGCR0(phy),
DG1_DPCLKA_CFGCR0_DDI_CLK_SEL_MASK(phy),
DG1_DPCLKA_CFGCR0_DDI_CLK_SEL_SHIFT(phy));
val = intel_de_read(i915, DG1_DPCLKA_CFGCR0(phy));
val &= DG1_DPCLKA_CFGCR0_DDI_CLK_SEL_MASK(phy);
val >>= DG1_DPCLKA_CFGCR0_DDI_CLK_SEL_SHIFT(phy);
id = val;
/*
* _DG1_DPCLKA0_CFGCR0 maps between DPLL 0 and 1 with one bit for phy A
* and B while _DG1_DPCLKA1_CFGCR0 maps between DPLL 2 and 3 with one
* bit for phy C and D.
*/
if (phy >= PHY_C)
id += DPLL_ID_DG1_DPLL2;
return intel_get_shared_dpll_by_id(i915, id);
}
static void icl_ddi_combo_enable_clock(struct intel_encoder *encoder,

View file

@ -2868,7 +2868,7 @@ static int intel_dp_vsc_sdp_unpack(struct drm_dp_vsc_sdp *vsc,
if (size < sizeof(struct dp_sdp))
return -EINVAL;
memset(vsc, 0, size);
memset(vsc, 0, sizeof(*vsc));
if (sdp->sdp_header.HB0 != 0)
return -EINVAL;

View file

@ -1279,7 +1279,7 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine)
return true;
/* Waiting to drain ELSP? */
synchronize_hardirq(to_pci_dev(engine->i915->drm.dev)->irq);
intel_synchronize_hardirq(engine->i915);
intel_engine_flush_submission(engine);
/* ELSP is empty, but there are ready requests? E.g. after reset */

View file

@ -184,8 +184,11 @@ static int xcs_resume(struct intel_engine_cs *engine)
ENGINE_TRACE(engine, "ring:{HEAD:%04x, TAIL:%04x}\n",
ring->head, ring->tail);
/* Double check the ring is empty & disabled before we resume */
synchronize_hardirq(engine->i915->drm.irq);
/*
* Double check the ring is empty & disabled before we resume. Called
* from atomic context during PCI probe, so _hardirq().
*/
intel_synchronize_hardirq(engine->i915);
if (!stop_ring(engine))
goto err;

View file

@ -42,7 +42,6 @@
#include <drm/drm_aperture.h>
#include <drm/drm_atomic_helper.h>
#include <drm/drm_ioctl.h>
#include <drm/drm_irq.h>
#include <drm/drm_managed.h>
#include <drm/drm_probe_helper.h>

View file

@ -33,7 +33,6 @@
#include <linux/sysrq.h>
#include <drm/drm_drv.h>
#include <drm/drm_irq.h>
#include "display/intel_de.h"
#include "display/intel_display_types.h"
@ -4564,10 +4563,6 @@ void intel_runtime_pm_enable_interrupts(struct drm_i915_private *dev_priv)
bool intel_irqs_enabled(struct drm_i915_private *dev_priv)
{
/*
* We only use drm_irq_uninstall() at unload and VT switch, so
* this is the only thing we need to check.
*/
return dev_priv->runtime_pm.irqs_enabled;
}
@ -4575,3 +4570,8 @@ void intel_synchronize_irq(struct drm_i915_private *i915)
{
synchronize_irq(to_pci_dev(i915->drm.dev)->irq);
}
void intel_synchronize_hardirq(struct drm_i915_private *i915)
{
synchronize_hardirq(to_pci_dev(i915->drm.dev)->irq);
}

View file

@ -94,6 +94,7 @@ void intel_runtime_pm_disable_interrupts(struct drm_i915_private *dev_priv);
void intel_runtime_pm_enable_interrupts(struct drm_i915_private *dev_priv);
bool intel_irqs_enabled(struct drm_i915_private *dev_priv);
void intel_synchronize_irq(struct drm_i915_private *i915);
void intel_synchronize_hardirq(struct drm_i915_private *i915);
int intel_get_crtc_scanline(struct intel_crtc *crtc);
void gen8_irq_power_well_post_enable(struct drm_i915_private *dev_priv,

View file

@ -10513,7 +10513,6 @@ enum skl_power_gate {
#define _DG1_DPCLKA1_CFGCR0 0x16C280
#define _DG1_DPCLKA_PHY_IDX(phy) ((phy) % 2)
#define _DG1_DPCLKA_PLL_IDX(pll) ((pll) % 2)
#define _DG1_PHY_DPLL_MAP(phy) ((phy) >= PHY_C ? DPLL_ID_DG1_DPLL2 : DPLL_ID_DG1_DPLL0)
#define DG1_DPCLKA_CFGCR0(phy) _MMIO_PHY((phy) / 2, \
_DG1_DPCLKA_CFGCR0, \
_DG1_DPCLKA1_CFGCR0)
@ -10521,8 +10520,6 @@ enum skl_power_gate {
#define DG1_DPCLKA_CFGCR0_DDI_CLK_SEL_SHIFT(phy) (_DG1_DPCLKA_PHY_IDX(phy) * 2)
#define DG1_DPCLKA_CFGCR0_DDI_CLK_SEL(pll, phy) (_DG1_DPCLKA_PLL_IDX(pll) << DG1_DPCLKA_CFGCR0_DDI_CLK_SEL_SHIFT(phy))
#define DG1_DPCLKA_CFGCR0_DDI_CLK_SEL_MASK(phy) (0x3 << DG1_DPCLKA_CFGCR0_DDI_CLK_SEL_SHIFT(phy))
#define DG1_DPCLKA_CFGCR0_DDI_CLK_SEL_DPLL_MAP(clk_sel, phy) \
(((clk_sel) >> DG1_DPCLKA_CFGCR0_DDI_CLK_SEL_SHIFT(phy)) + _DG1_PHY_DPLL_MAP(phy))
/* ADLS Clocks */
#define _ADLS_DPCLKA_CFGCR0 0x164280

View file

@ -1325,6 +1325,7 @@ radeon_user_framebuffer_create(struct drm_device *dev,
/* Handle is imported dma-buf, so cannot be migrated to VRAM for scanout */
if (obj->import_attach) {
DRM_DEBUG_KMS("Cannot create framebuffer from imported dma_buf\n");
drm_gem_object_put(obj);
return ERR_PTR(-EINVAL);
}

View file

@ -374,13 +374,13 @@ radeon_pci_shutdown(struct pci_dev *pdev)
if (radeon_device_is_virtual())
radeon_pci_remove(pdev);
#ifdef CONFIG_PPC64
#if defined(CONFIG_PPC64) || defined(CONFIG_MACH_LOONGSON64)
/*
* Some adapters need to be suspended before a
* shutdown occurs in order to prevent an error
* during kexec.
* Make this power specific becauase it breaks
* some non-power boards.
* during kexec, shutdown or reboot.
* Make this power and Loongson specific because
* it breaks some other boards.
*/
radeon_suspend_kms(pci_get_drvdata(pdev), true, true, false);
#endif

View file

@ -49,23 +49,23 @@ static void radeon_bo_clear_surface_reg(struct radeon_bo *bo);
* function are calling it.
*/
static void radeon_update_memory_usage(struct radeon_bo *bo,
unsigned mem_type, int sign)
static void radeon_update_memory_usage(struct ttm_buffer_object *bo,
unsigned int mem_type, int sign)
{
struct radeon_device *rdev = bo->rdev;
struct radeon_device *rdev = radeon_get_rdev(bo->bdev);
switch (mem_type) {
case TTM_PL_TT:
if (sign > 0)
atomic64_add(bo->tbo.base.size, &rdev->gtt_usage);
atomic64_add(bo->base.size, &rdev->gtt_usage);
else
atomic64_sub(bo->tbo.base.size, &rdev->gtt_usage);
atomic64_sub(bo->base.size, &rdev->gtt_usage);
break;
case TTM_PL_VRAM:
if (sign > 0)
atomic64_add(bo->tbo.base.size, &rdev->vram_usage);
atomic64_add(bo->base.size, &rdev->vram_usage);
else
atomic64_sub(bo->tbo.base.size, &rdev->vram_usage);
atomic64_sub(bo->base.size, &rdev->vram_usage);
break;
}
}
@ -76,8 +76,6 @@ static void radeon_ttm_bo_destroy(struct ttm_buffer_object *tbo)
bo = container_of(tbo, struct radeon_bo, tbo);
radeon_update_memory_usage(bo, bo->tbo.resource->mem_type, -1);
mutex_lock(&bo->rdev->gem.mutex);
list_del_init(&bo->list);
mutex_unlock(&bo->rdev->gem.mutex);
@ -727,24 +725,21 @@ int radeon_bo_check_tiling(struct radeon_bo *bo, bool has_moved,
}
void radeon_bo_move_notify(struct ttm_buffer_object *bo,
bool evict,
unsigned int old_type,
struct ttm_resource *new_mem)
{
struct radeon_bo *rbo;
radeon_update_memory_usage(bo, old_type, -1);
if (new_mem)
radeon_update_memory_usage(bo, new_mem->mem_type, 1);
if (!radeon_ttm_bo_is_radeon_bo(bo))
return;
rbo = container_of(bo, struct radeon_bo, tbo);
radeon_bo_check_tiling(rbo, 0, 1);
radeon_vm_bo_invalidate(rbo->rdev, rbo);
/* update statistics */
if (!new_mem)
return;
radeon_update_memory_usage(rbo, bo->resource->mem_type, -1);
radeon_update_memory_usage(rbo, new_mem->mem_type, 1);
}
vm_fault_t radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo)

View file

@ -161,7 +161,7 @@ extern void radeon_bo_get_tiling_flags(struct radeon_bo *bo,
extern int radeon_bo_check_tiling(struct radeon_bo *bo, bool has_moved,
bool force_drop);
extern void radeon_bo_move_notify(struct ttm_buffer_object *bo,
bool evict,
unsigned int old_type,
struct ttm_resource *new_mem);
extern vm_fault_t radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo);
extern int radeon_bo_get_surface_reg(struct radeon_bo *bo);

View file

@ -199,7 +199,7 @@ static int radeon_bo_move(struct ttm_buffer_object *bo, bool evict,
struct ttm_resource *old_mem = bo->resource;
struct radeon_device *rdev;
struct radeon_bo *rbo;
int r;
int r, old_type;
if (new_mem->mem_type == TTM_PL_TT) {
r = radeon_ttm_tt_bind(bo->bdev, bo->ttm, new_mem);
@ -216,6 +216,9 @@ static int radeon_bo_move(struct ttm_buffer_object *bo, bool evict,
if (WARN_ON_ONCE(rbo->tbo.pin_count > 0))
return -EINVAL;
/* Save old type for statistics update */
old_type = old_mem->mem_type;
rdev = radeon_get_rdev(bo->bdev);
if (old_mem->mem_type == TTM_PL_SYSTEM && bo->ttm == NULL) {
ttm_bo_move_null(bo, new_mem);
@ -261,7 +264,7 @@ static int radeon_bo_move(struct ttm_buffer_object *bo, bool evict,
out:
/* update statistics */
atomic64_add(bo->base.size, &rdev->num_bytes_moved);
radeon_bo_move_notify(bo, evict, new_mem);
radeon_bo_move_notify(bo, old_type, new_mem);
return 0;
}
@ -682,7 +685,11 @@ bool radeon_ttm_tt_is_readonly(struct radeon_device *rdev,
static void
radeon_bo_delete_mem_notify(struct ttm_buffer_object *bo)
{
radeon_bo_move_notify(bo, false, NULL);
unsigned int old_type = TTM_PL_SYSTEM;
if (bo->resource)
old_type = bo->resource->mem_type;
radeon_bo_move_notify(bo, old_type, NULL);
}
static struct ttm_device_funcs radeon_bo_driver = {

View file

@ -212,7 +212,7 @@ static inline void dma_resv_unlock(struct dma_resv *obj)
}
/**
* dma_resv_exclusive - return the object's exclusive fence
* dma_resv_excl_fence - return the object's exclusive fence
* @obj: the reservation object
*
* Returns the exclusive fence (if any). Caller must either hold the objects