Skip to content

Commit 524a1e4

Browse files
sean-jcbonzini
authored andcommitted
KVM: x86/mmu: Don't leak non-leaf SPTEs when zapping all SPTEs
Pass "all ones" as the end GFN to signal "zap all" for the TDP MMU and really zap all SPTEs in this case. As is, zap_gfn_range() skips non-leaf SPTEs whose range exceeds the range to be zapped. If shadow_phys_bits is not aligned to the range size of top-level SPTEs, e.g. 512gb with 4-level paging, the "zap all" flows will skip top-level SPTEs whose range extends beyond shadow_phys_bits and leak their SPs when the VM is destroyed. Use the current upper bound (based on host.MAXPHYADDR) to detect that the caller wants to zap all SPTEs, e.g. instead of using the max theoretical gfn, 1 << (52 - 12). The more precise upper bound allows the TDP iterator to terminate its walk earlier when running on hosts with MAXPHYADDR < 52. Add a WARN on kmv->arch.tdp_mmu_pages when the TDP MMU is destroyed to help future debuggers should KVM decide to leak SPTEs again. The bug is most easily reproduced by running (and unloading!) KVM in a VM whose host.MAXPHYADDR < 39, as the SPTE for gfn=0 will be skipped. ============================================================================= BUG kvm_mmu_page_header (Not tainted): Objects remaining in kvm_mmu_page_header on __kmem_cache_shutdown() ----------------------------------------------------------------------------- Slab 0x000000004d8f7af1 objects=22 used=2 fp=0x00000000624d29ac flags=0x4000000000000200(slab|zone=1) CPU: 0 PID: 1582 Comm: rmmod Not tainted 5.14.0-rc2+ #420 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015 Call Trace: dump_stack_lvl+0x45/0x59 slab_err+0x95/0xc9 __kmem_cache_shutdown.cold+0x3c/0x158 kmem_cache_destroy+0x3d/0xf0 kvm_mmu_module_exit+0xa/0x30 [kvm] kvm_arch_exit+0x5d/0x90 [kvm] kvm_exit+0x78/0x90 [kvm] vmx_exit+0x1a/0x50 [kvm_intel] __x64_sys_delete_module+0x13f/0x220 do_syscall_64+0x3b/0xc0 entry_SYSCALL_64_after_hwframe+0x44/0xae Fixes: faaf05b ("kvm: x86/mmu: Support zapping SPTEs in the TDP MMU") Cc: [email protected] Cc: Ben Gardon <[email protected]> Signed-off-by: Sean Christopherson <[email protected]> Message-Id: <[email protected]> Signed-off-by: Paolo Bonzini <[email protected]>
1 parent d5aaad6 commit 524a1e4

File tree

1 file changed

+16
-10
lines changed

1 file changed

+16
-10
lines changed

arch/x86/kvm/mmu/tdp_mmu.c

+16-10
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm)
4343
if (!kvm->arch.tdp_mmu_enabled)
4444
return;
4545

46+
WARN_ON(!list_empty(&kvm->arch.tdp_mmu_pages));
4647
WARN_ON(!list_empty(&kvm->arch.tdp_mmu_roots));
4748

4849
/*
@@ -81,8 +82,6 @@ static void tdp_mmu_free_sp_rcu_callback(struct rcu_head *head)
8182
void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root,
8283
bool shared)
8384
{
84-
gfn_t max_gfn = 1ULL << (shadow_phys_bits - PAGE_SHIFT);
85-
8685
kvm_lockdep_assert_mmu_lock_held(kvm, shared);
8786

8887
if (!refcount_dec_and_test(&root->tdp_mmu_root_count))
@@ -94,7 +93,7 @@ void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root,
9493
list_del_rcu(&root->link);
9594
spin_unlock(&kvm->arch.tdp_mmu_pages_lock);
9695

97-
zap_gfn_range(kvm, root, 0, max_gfn, false, false, shared);
96+
zap_gfn_range(kvm, root, 0, -1ull, false, false, shared);
9897

9998
call_rcu(&root->rcu_head, tdp_mmu_free_sp_rcu_callback);
10099
}
@@ -724,8 +723,17 @@ static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
724723
gfn_t start, gfn_t end, bool can_yield, bool flush,
725724
bool shared)
726725
{
726+
gfn_t max_gfn_host = 1ULL << (shadow_phys_bits - PAGE_SHIFT);
727+
bool zap_all = (start == 0 && end >= max_gfn_host);
727728
struct tdp_iter iter;
728729

730+
/*
731+
* Bound the walk at host.MAXPHYADDR, guest accesses beyond that will
732+
* hit a #PF(RSVD) and never get to an EPT Violation/Misconfig / #NPF,
733+
* and so KVM will never install a SPTE for such addresses.
734+
*/
735+
end = min(end, max_gfn_host);
736+
729737
kvm_lockdep_assert_mmu_lock_held(kvm, shared);
730738

731739
rcu_read_lock();
@@ -744,9 +752,10 @@ static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
744752
/*
745753
* If this is a non-last-level SPTE that covers a larger range
746754
* than should be zapped, continue, and zap the mappings at a
747-
* lower level.
755+
* lower level, except when zapping all SPTEs.
748756
*/
749-
if ((iter.gfn < start ||
757+
if (!zap_all &&
758+
(iter.gfn < start ||
750759
iter.gfn + KVM_PAGES_PER_HPAGE(iter.level) > end) &&
751760
!is_last_spte(iter.old_spte, iter.level))
752761
continue;
@@ -794,12 +803,11 @@ bool __kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, int as_id, gfn_t start,
794803

795804
void kvm_tdp_mmu_zap_all(struct kvm *kvm)
796805
{
797-
gfn_t max_gfn = 1ULL << (shadow_phys_bits - PAGE_SHIFT);
798806
bool flush = false;
799807
int i;
800808

801809
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
802-
flush = kvm_tdp_mmu_zap_gfn_range(kvm, i, 0, max_gfn,
810+
flush = kvm_tdp_mmu_zap_gfn_range(kvm, i, 0, -1ull,
803811
flush, false);
804812

805813
if (flush)
@@ -838,7 +846,6 @@ static struct kvm_mmu_page *next_invalidated_root(struct kvm *kvm,
838846
*/
839847
void kvm_tdp_mmu_zap_invalidated_roots(struct kvm *kvm)
840848
{
841-
gfn_t max_gfn = 1ULL << (shadow_phys_bits - PAGE_SHIFT);
842849
struct kvm_mmu_page *next_root;
843850
struct kvm_mmu_page *root;
844851
bool flush = false;
@@ -854,8 +861,7 @@ void kvm_tdp_mmu_zap_invalidated_roots(struct kvm *kvm)
854861

855862
rcu_read_unlock();
856863

857-
flush = zap_gfn_range(kvm, root, 0, max_gfn, true, flush,
858-
true);
864+
flush = zap_gfn_range(kvm, root, 0, -1ull, true, flush, true);
859865

860866
/*
861867
* Put the reference acquired in

0 commit comments

Comments
 (0)