From deae4a10f16649d9c8bfb89f38b61930fb938284 Mon Sep 17 00:00:00 2001 From: David Stevens Date: Wed, 22 Sep 2021 13:58:59 +0900 Subject: [PATCH] KVM: x86: only allocate gfn_track when necessary Avoid allocating the gfn_track arrays if nothing needs them. If there are no external to KVM users of the API (i.e. no GVT-g), then page tracking is only needed for shadow page tables. This means that when tdp is enabled and there are no external users, then the gfn_track arrays can be lazily allocated when the shadow MMU is actually used. This avoid allocations equal to .05% of guest memory when nested virtualization is not used, if the kernel is compiled without GVT-g. Signed-off-by: David Stevens Message-Id: <20210922045859.2011227-3-stevensd@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 8 +++ arch/x86/include/asm/kvm_page_track.h | 8 ++- arch/x86/kvm/mmu/mmu.c | 11 +++- arch/x86/kvm/mmu/page_track.c | 72 +++++++++++++++++++++++++-- arch/x86/kvm/x86.c | 2 +- 5 files changed, 93 insertions(+), 8 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 1b280292bdff..5271fce6cd65 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1211,6 +1211,14 @@ struct kvm_arch { */ bool memslots_have_rmaps; + /* + * Set when the KVM mmu needs guest write access page tracking. If + * set, the necessary gfn_track arrays have been allocated for + * all memslots and should be allocated for any newly created or + * modified memslots. + */ + bool memslots_mmu_write_tracking; + #if IS_ENABLED(CONFIG_HYPERV) hpa_t hv_root_tdp; spinlock_t hv_root_tdp_lock; diff --git a/arch/x86/include/asm/kvm_page_track.h b/arch/x86/include/asm/kvm_page_track.h index 5c12f97ce934..79d84a94f8eb 100644 --- a/arch/x86/include/asm/kvm_page_track.h +++ b/arch/x86/include/asm/kvm_page_track.h @@ -49,8 +49,11 @@ struct kvm_page_track_notifier_node { int kvm_page_track_init(struct kvm *kvm); void kvm_page_track_cleanup(struct kvm *kvm); +int kvm_page_track_enable_mmu_write_tracking(struct kvm *kvm); + void kvm_page_track_free_memslot(struct kvm_memory_slot *slot); -int kvm_page_track_create_memslot(struct kvm_memory_slot *slot, +int kvm_page_track_create_memslot(struct kvm *kvm, + struct kvm_memory_slot *slot, unsigned long npages); void kvm_slot_page_track_add_page(struct kvm *kvm, @@ -59,7 +62,8 @@ void kvm_slot_page_track_add_page(struct kvm *kvm, void kvm_slot_page_track_remove_page(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn, enum kvm_page_track_mode mode); -bool kvm_slot_page_track_is_active(struct kvm_memory_slot *slot, gfn_t gfn, +bool kvm_slot_page_track_is_active(struct kvm_vcpu *vcpu, + struct kvm_memory_slot *slot, gfn_t gfn, enum kvm_page_track_mode mode); void diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 91292009780a..24a9f4c3f5e7 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -2583,7 +2583,7 @@ int mmu_try_to_unsync_pages(struct kvm_vcpu *vcpu, struct kvm_memory_slot *slot, * track machinery is used to write-protect upper-level shadow pages, * i.e. this guards the role.level == 4K assertion below! */ - if (kvm_slot_page_track_is_active(slot, gfn, KVM_PAGE_TRACK_WRITE)) + if (kvm_slot_page_track_is_active(vcpu, slot, gfn, KVM_PAGE_TRACK_WRITE)) return -EPERM; /* @@ -3431,6 +3431,10 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) if (r) return r; + r = kvm_page_track_enable_mmu_write_tracking(vcpu->kvm); + if (r) + return r; + write_lock(&vcpu->kvm->mmu_lock); r = make_mmu_pages_available(vcpu); if (r < 0) @@ -3790,7 +3794,7 @@ static bool page_fault_handle_page_track(struct kvm_vcpu *vcpu, * guest is writing the page which is write tracked which can * not be fixed by page fault handler. */ - if (kvm_slot_page_track_is_active(fault->slot, fault->gfn, KVM_PAGE_TRACK_WRITE)) + if (kvm_slot_page_track_is_active(vcpu, fault->slot, fault->gfn, KVM_PAGE_TRACK_WRITE)) return true; return false; @@ -5607,6 +5611,9 @@ void kvm_mmu_init_vm(struct kvm *kvm) */ kvm->arch.memslots_have_rmaps = true; + if (!tdp_enabled) + kvm->arch.memslots_mmu_write_tracking = true; + node->track_write = kvm_mmu_pte_write; node->track_flush_slot = kvm_mmu_invalidate_zap_pages_in_memslot; kvm_page_track_register_notifier(kvm, node); diff --git a/arch/x86/kvm/mmu/page_track.c b/arch/x86/kvm/mmu/page_track.c index 16e7176c97a5..bb5d60bd4dbf 100644 --- a/arch/x86/kvm/mmu/page_track.c +++ b/arch/x86/kvm/mmu/page_track.c @@ -19,6 +19,16 @@ #include "mmu.h" #include "mmu_internal.h" +static bool write_tracking_enabled(struct kvm *kvm) +{ + /* + * Read memslots_mmu_write_tracking before gfn_track pointers. Pairs + * with smp_store_release in kvm_page_track_enable_mmu_write_tracking. + */ + return IS_ENABLED(CONFIG_KVM_EXTERNAL_WRITE_TRACKING) || + smp_load_acquire(&kvm->arch.memslots_mmu_write_tracking); +} + void kvm_page_track_free_memslot(struct kvm_memory_slot *slot) { int i; @@ -29,12 +39,16 @@ void kvm_page_track_free_memslot(struct kvm_memory_slot *slot) } } -int kvm_page_track_create_memslot(struct kvm_memory_slot *slot, +int kvm_page_track_create_memslot(struct kvm *kvm, + struct kvm_memory_slot *slot, unsigned long npages) { - int i; + int i; for (i = 0; i < KVM_PAGE_TRACK_MAX; i++) { + if (i == KVM_PAGE_TRACK_WRITE && !write_tracking_enabled(kvm)) + continue; + slot->arch.gfn_track[i] = kvcalloc(npages, sizeof(*slot->arch.gfn_track[i]), GFP_KERNEL_ACCOUNT); @@ -57,6 +71,46 @@ static inline bool page_track_mode_is_valid(enum kvm_page_track_mode mode) return true; } +int kvm_page_track_enable_mmu_write_tracking(struct kvm *kvm) +{ + struct kvm_memslots *slots; + struct kvm_memory_slot *slot; + unsigned short **gfn_track; + int i; + + if (write_tracking_enabled(kvm)) + return 0; + + mutex_lock(&kvm->slots_arch_lock); + + if (write_tracking_enabled(kvm)) { + mutex_unlock(&kvm->slots_arch_lock); + return 0; + } + + for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) { + slots = __kvm_memslots(kvm, i); + kvm_for_each_memslot(slot, slots) { + gfn_track = slot->arch.gfn_track + KVM_PAGE_TRACK_WRITE; + *gfn_track = kvcalloc(slot->npages, sizeof(*gfn_track), + GFP_KERNEL_ACCOUNT); + if (*gfn_track == NULL) { + mutex_unlock(&kvm->slots_arch_lock); + return -ENOMEM; + } + } + } + + /* + * Ensure that memslots_mmu_write_tracking becomes true strictly + * after all the pointers are set. + */ + smp_store_release(&kvm->arch.memslots_mmu_write_tracking, true); + mutex_unlock(&kvm->slots_arch_lock); + + return 0; +} + static void update_gfn_track(struct kvm_memory_slot *slot, gfn_t gfn, enum kvm_page_track_mode mode, short count) { @@ -92,6 +146,10 @@ void kvm_slot_page_track_add_page(struct kvm *kvm, if (WARN_ON(!page_track_mode_is_valid(mode))) return; + if (WARN_ON(mode == KVM_PAGE_TRACK_WRITE && + !write_tracking_enabled(kvm))) + return; + update_gfn_track(slot, gfn, mode, 1); /* @@ -126,6 +184,10 @@ void kvm_slot_page_track_remove_page(struct kvm *kvm, if (WARN_ON(!page_track_mode_is_valid(mode))) return; + if (WARN_ON(mode == KVM_PAGE_TRACK_WRITE && + !write_tracking_enabled(kvm))) + return; + update_gfn_track(slot, gfn, mode, -1); /* @@ -139,7 +201,8 @@ EXPORT_SYMBOL_GPL(kvm_slot_page_track_remove_page); /* * check if the corresponding access on the specified guest page is tracked. */ -bool kvm_slot_page_track_is_active(struct kvm_memory_slot *slot, gfn_t gfn, +bool kvm_slot_page_track_is_active(struct kvm_vcpu *vcpu, + struct kvm_memory_slot *slot, gfn_t gfn, enum kvm_page_track_mode mode) { int index; @@ -150,6 +213,9 @@ bool kvm_slot_page_track_is_active(struct kvm_memory_slot *slot, gfn_t gfn, if (!slot) return false; + if (mode == KVM_PAGE_TRACK_WRITE && !write_tracking_enabled(vcpu->kvm)) + return false; + index = gfn_to_index(gfn, slot->base_gfn, PG_LEVEL_4K); return !!READ_ONCE(slot->arch.gfn_track[mode][index]); } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 03091a2e0822..db7fa1398f0d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -11476,7 +11476,7 @@ static int kvm_alloc_memslot_metadata(struct kvm *kvm, } } - if (kvm_page_track_create_memslot(slot, npages)) + if (kvm_page_track_create_memslot(kvm, slot, npages)) goto out_free; return 0;