Skip to content

Commit 203b7b1

Browse files
committed
KVM: x86/mmu: Don't skip non-leaf SPTEs when zapping all SPTEs
Use a magic number for the end gfn to signal "zap all" for the TDP MMU and really zap all SPTEs in this case. As is, zap_gfn_range() skips non-leaf SPTEs whose range exceeds the range to be zapped. If shadow_phys_bits is not aligned to the range size of top-level SPTEs, e.g. 512gb with 4-level paging, the "zap all" flows will skip top-level SPTEs whose range extends beyond shadow_phys_bits and leak their SPs when the VM is destroyed. Use a magic number and the current upper bound (based on host.MAXPHYADDR) instead of the max theoretical gfn, 1 << (52 - 12), even though the latter would be functionally ok, too. Bounding based on host.MAXPHYADDR allows the TDP iterator to terminate its walk when the gfn of the iterator is out of bounds. Add a WARN on kmv->arch.tdp_mmu_pages when the TDP MMU is destroyed to help future debuggers should KVM decide to leak SPTEs again. The bug is most easily reproduced by running (and unloading!) KVM in a VM whose host.MAXPHYADDR < 39, as the SPTE for gfn=0 will be skipped. ============================================================================= BUG kvm_mmu_page_header (Not tainted): Objects remaining in kvm_mmu_page_header on __kmem_cache_shutdown() ----------------------------------------------------------------------------- Slab 0x000000004d8f7af1 objects=22 used=2 fp=0x00000000624d29ac flags=0x4000000000000200(slab|zone=1) CPU: 0 PID: 1582 Comm: rmmod Not tainted 5.14.0-rc2+ torvalds#420 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015 Call Trace: dump_stack_lvl+0x45/0x59 slab_err+0x95/0xc9 __kmem_cache_shutdown.cold+0x3c/0x158 kmem_cache_destroy+0x3d/0xf0 kvm_mmu_module_exit+0xa/0x30 [kvm] kvm_arch_exit+0x5d/0x90 [kvm] kvm_exit+0x78/0x90 [kvm] vmx_exit+0x1a/0x50 [kvm_intel] __x64_sys_delete_module+0x13f/0x220 do_syscall_64+0x3b/0xc0 entry_SYSCALL_64_after_hwframe+0x44/0xae Fixes: faaf05b ("kvm: x86/mmu: Support zapping SPTEs in the TDP MMU") Cc: [email protected] Cc: Ben Gardon <[email protected]> Signed-off-by: Sean Christopherson <[email protected]>
1 parent 03fde7d commit 203b7b1

File tree

1 file changed

+26
-10
lines changed

1 file changed

+26
-10
lines changed

arch/x86/kvm/mmu/tdp_mmu.c

+26-10
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,17 @@
1313
static bool __read_mostly tdp_mmu_enabled = true;
1414
module_param_named(tdp_mmu, tdp_mmu_enabled, bool, 0644);
1515

16+
/*
17+
* Magic numbers for 'start' and 'end' used when zapping all SPTEs. The values
18+
* are not completely arbitrary; start must be 0, and end must be greater than
19+
* the max theoretical GFN that KVM can fault into the TDP MMU.
20+
*/
21+
#define ZAP_ALL_START 0
22+
#define ZAP_ALL_END -1ull
23+
24+
/* start+end pair for zapping SPTEs for all possible GFNs. */
25+
#define ZAP_ALL ZAP_ALL_START, ZAP_ALL_END
26+
1627
/* Initializes the TDP MMU for the VM, if enabled. */
1728
bool kvm_mmu_init_tdp_mmu(struct kvm *kvm)
1829
{
@@ -43,6 +54,7 @@ void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm)
4354
if (!kvm->arch.tdp_mmu_enabled)
4455
return;
4556

57+
WARN_ON(!list_empty(&kvm->arch.tdp_mmu_pages));
4658
WARN_ON(!list_empty(&kvm->arch.tdp_mmu_roots));
4759

4860
/*
@@ -81,8 +93,6 @@ static void tdp_mmu_free_sp_rcu_callback(struct rcu_head *head)
8193
void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root,
8294
bool shared)
8395
{
84-
gfn_t max_gfn = 1ULL << (shadow_phys_bits - PAGE_SHIFT);
85-
8696
kvm_lockdep_assert_mmu_lock_held(kvm, shared);
8797

8898
if (!refcount_dec_and_test(&root->tdp_mmu_root_count))
@@ -94,7 +104,7 @@ void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root,
94104
list_del_rcu(&root->link);
95105
spin_unlock(&kvm->arch.tdp_mmu_pages_lock);
96106

97-
zap_gfn_range(kvm, root, 0, max_gfn, false, false, shared);
107+
zap_gfn_range(kvm, root, ZAP_ALL, false, false, shared);
98108

99109
call_rcu(&root->rcu_head, tdp_mmu_free_sp_rcu_callback);
100110
}
@@ -739,8 +749,16 @@ static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
739749
gfn_t start, gfn_t end, bool can_yield, bool flush,
740750
bool shared)
741751
{
752+
bool zap_all = (end == ZAP_ALL_END);
742753
struct tdp_iter iter;
743754

755+
/*
756+
* Bound the walk at host.MAXPHYADDR, guest accesses beyond that will
757+
* hit a #PF(RSVD) and never get to an EPT Violation/Misconfig / #NPF,
758+
* and so KVM will never install a SPTE for such addresses.
759+
*/
760+
end = min(end, 1ULL << (shadow_phys_bits - PAGE_SHIFT));
761+
744762
kvm_lockdep_assert_mmu_lock_held(kvm, shared);
745763

746764
rcu_read_lock();
@@ -759,9 +777,10 @@ static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
759777
/*
760778
* If this is a non-last-level SPTE that covers a larger range
761779
* than should be zapped, continue, and zap the mappings at a
762-
* lower level.
780+
* lower level, except when zapping all SPTEs.
763781
*/
764-
if ((iter.gfn < start ||
782+
if (!zap_all &&
783+
(iter.gfn < start ||
765784
iter.gfn + KVM_PAGES_PER_HPAGE(iter.level) > end) &&
766785
!is_last_spte(iter.old_spte, iter.level))
767786
continue;
@@ -803,12 +822,11 @@ bool __kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, int as_id, gfn_t start,
803822

804823
void kvm_tdp_mmu_zap_all(struct kvm *kvm)
805824
{
806-
gfn_t max_gfn = 1ULL << (shadow_phys_bits - PAGE_SHIFT);
807825
bool flush = false;
808826
int i;
809827

810828
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
811-
flush = kvm_tdp_mmu_zap_gfn_range(kvm, i, 0, max_gfn, flush);
829+
flush = kvm_tdp_mmu_zap_gfn_range(kvm, i, ZAP_ALL, flush);
812830

813831
if (flush)
814832
kvm_flush_remote_tlbs(kvm);
@@ -846,7 +864,6 @@ static struct kvm_mmu_page *next_invalidated_root(struct kvm *kvm,
846864
*/
847865
void kvm_tdp_mmu_zap_invalidated_roots(struct kvm *kvm)
848866
{
849-
gfn_t max_gfn = 1ULL << (shadow_phys_bits - PAGE_SHIFT);
850867
struct kvm_mmu_page *next_root;
851868
struct kvm_mmu_page *root;
852869
bool flush = false;
@@ -862,8 +879,7 @@ void kvm_tdp_mmu_zap_invalidated_roots(struct kvm *kvm)
862879

863880
rcu_read_unlock();
864881

865-
flush = zap_gfn_range(kvm, root, 0, max_gfn, true, flush,
866-
true);
882+
flush = zap_gfn_range(kvm, root, ZAP_ALL, true, flush, true);
867883

868884
/*
869885
* Put the reference acquired in

0 commit comments

Comments
 (0)