Skip to content

Commit 997bd70

Browse files
mm/memory_hotplug: shrink zones when offlining memory
commit feee6b2 upstream. -- snip -- - Missing arm64 hot(un)plug support - Missing some vmem_altmap_offset() cleanups - Missing sub-section hotadd support - Missing unification of mm/hmm.c and kernel/memremap.c -- snip -- We currently try to shrink a single zone when removing memory. We use the zone of the first page of the memory we are removing. If that memmap was never initialized (e.g., memory was never onlined), we will read garbage and can trigger kernel BUGs (due to a stale pointer): BUG: unable to handle page fault for address: 000000000000353d #PF: supervisor write access in kernel mode #PF: error_code(0x0002) - not-present page PGD 0 P4D 0 Oops: 0002 [#1] SMP PTI CPU: 1 PID: 7 Comm: kworker/u8:0 Not tainted 5.3.0-rc5-next-20190820+ torvalds#317 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.1-0-ga5cab58e9a3f-prebuilt.qemu.4 Workqueue: kacpi_hotplug acpi_hotplug_work_fn RIP: 0010:clear_zone_contiguous+0x5/0x10 Code: 48 89 c6 48 89 c3 e8 2a fe ff ff 48 85 c0 75 cf 5b 5d c3 c6 85 fd 05 00 00 01 5b 5d c3 0f 1f 840 RSP: 0018:ffffad2400043c98 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000200000000 RCX: 0000000000000000 RDX: 0000000000200000 RSI: 0000000000140000 RDI: 0000000000002f40 RBP: 0000000140000000 R08: 0000000000000000 R09: 0000000000000001 R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000140000 R13: 0000000000140000 R14: 0000000000002f40 R15: ffff9e3e7aff3680 FS: 0000000000000000(0000) GS:ffff9e3e7bb00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000000000000353d CR3: 0000000058610000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: __remove_pages+0x4b/0x640 arch_remove_memory+0x63/0x8d try_remove_memory+0xdb/0x130 __remove_memory+0xa/0x11 acpi_memory_device_remove+0x70/0x100 acpi_bus_trim+0x55/0x90 acpi_device_hotplug+0x227/0x3a0 acpi_hotplug_work_fn+0x1a/0x30 process_one_work+0x221/0x550 worker_thread+0x50/0x3b0 kthread+0x105/0x140 ret_from_fork+0x3a/0x50 Modules linked in: CR2: 000000000000353d Instead, shrink the zones when offlining memory or when onlining failed. Introduce and use remove_pfn_range_from_zone(() for that. We now properly shrink the zones, even if we have DIMMs whereby - Some memory blocks fall into no zone (never onlined) - Some memory blocks fall into multiple zones (offlined+re-onlined) - Multiple memory blocks that fall into different zones Drop the zone parameter (with a potential dubious value) from __remove_pages() and __remove_section(). Link: http://lkml.kernel.org/r/[email protected] Fixes: f1dd2cd ("mm, memory_hotplug: do not associate hotadded memory to zones until online") [visible after d0dc12e] Signed-off-by: David Hildenbrand <[email protected]> Reviewed-by: Oscar Salvador <[email protected]> Cc: Michal Hocko <[email protected]> Cc: "Matthew Wilcox (Oracle)" <[email protected]> Cc: "Aneesh Kumar K.V" <[email protected]> Cc: Pavel Tatashin <[email protected]> Cc: Greg Kroah-Hartman <[email protected]> Cc: Dan Williams <[email protected]> Cc: Logan Gunthorpe <[email protected]> Cc: <[email protected]> [5.0+] Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]> Signed-off-by: David Hildenbrand <[email protected]>
1 parent 52d9d45 commit 997bd70

File tree

10 files changed

+27
-51
lines changed

10 files changed

+27
-51
lines changed

arch/ia64/mm/init.c

+1-3
Original file line numberDiff line numberDiff line change
@@ -666,9 +666,7 @@ void arch_remove_memory(int nid, u64 start, u64 size,
666666
{
667667
unsigned long start_pfn = start >> PAGE_SHIFT;
668668
unsigned long nr_pages = size >> PAGE_SHIFT;
669-
struct zone *zone;
670669

671-
zone = page_zone(pfn_to_page(start_pfn));
672-
__remove_pages(zone, start_pfn, nr_pages, altmap);
670+
__remove_pages(start_pfn, nr_pages, altmap);
673671
}
674672
#endif

arch/powerpc/mm/mem.c

+1-10
Original file line numberDiff line numberDiff line change
@@ -144,18 +144,9 @@ void __ref arch_remove_memory(int nid, u64 start, u64 size,
144144
{
145145
unsigned long start_pfn = start >> PAGE_SHIFT;
146146
unsigned long nr_pages = size >> PAGE_SHIFT;
147-
struct page *page;
148147
int ret;
149148

150-
/*
151-
* If we have an altmap then we need to skip over any reserved PFNs
152-
* when querying the zone.
153-
*/
154-
page = pfn_to_page(start_pfn);
155-
if (altmap)
156-
page += vmem_altmap_offset(altmap);
157-
158-
__remove_pages(page_zone(page), start_pfn, nr_pages, altmap);
149+
__remove_pages(start_pfn, nr_pages, altmap);
159150

160151
/* Remove htab bolted mappings for this section of memory */
161152
start = (unsigned long)__va(start);

arch/s390/mm/init.c

+1-3
Original file line numberDiff line numberDiff line change
@@ -244,10 +244,8 @@ void arch_remove_memory(int nid, u64 start, u64 size,
244244
{
245245
unsigned long start_pfn = start >> PAGE_SHIFT;
246246
unsigned long nr_pages = size >> PAGE_SHIFT;
247-
struct zone *zone;
248247

249-
zone = page_zone(pfn_to_page(start_pfn));
250-
__remove_pages(zone, start_pfn, nr_pages, altmap);
248+
__remove_pages(start_pfn, nr_pages, altmap);
251249
vmem_remove_mapping(start, size);
252250
}
253251
#endif /* CONFIG_MEMORY_HOTPLUG */

arch/sh/mm/init.c

+1-3
Original file line numberDiff line numberDiff line change
@@ -448,9 +448,7 @@ void arch_remove_memory(int nid, u64 start, u64 size,
448448
{
449449
unsigned long start_pfn = PFN_DOWN(start);
450450
unsigned long nr_pages = size >> PAGE_SHIFT;
451-
struct zone *zone;
452451

453-
zone = page_zone(pfn_to_page(start_pfn));
454-
__remove_pages(zone, start_pfn, nr_pages, altmap);
452+
__remove_pages(start_pfn, nr_pages, altmap);
455453
}
456454
#endif /* CONFIG_MEMORY_HOTPLUG */

arch/x86/mm/init_32.c

+1-3
Original file line numberDiff line numberDiff line change
@@ -865,10 +865,8 @@ void arch_remove_memory(int nid, u64 start, u64 size,
865865
{
866866
unsigned long start_pfn = start >> PAGE_SHIFT;
867867
unsigned long nr_pages = size >> PAGE_SHIFT;
868-
struct zone *zone;
869868

870-
zone = page_zone(pfn_to_page(start_pfn));
871-
__remove_pages(zone, start_pfn, nr_pages, altmap);
869+
__remove_pages(start_pfn, nr_pages, altmap);
872870
}
873871
#endif
874872

arch/x86/mm/init_64.c

+1-7
Original file line numberDiff line numberDiff line change
@@ -1146,14 +1146,8 @@ void __ref arch_remove_memory(int nid, u64 start, u64 size,
11461146
{
11471147
unsigned long start_pfn = start >> PAGE_SHIFT;
11481148
unsigned long nr_pages = size >> PAGE_SHIFT;
1149-
struct page *page = pfn_to_page(start_pfn);
1150-
struct zone *zone;
11511149

1152-
/* With altmap the first mapped page is offset from @start */
1153-
if (altmap)
1154-
page += vmem_altmap_offset(altmap);
1155-
zone = page_zone(page);
1156-
__remove_pages(zone, start_pfn, nr_pages, altmap);
1150+
__remove_pages(start_pfn, nr_pages, altmap);
11571151
kernel_physical_mapping_remove(start, start + size);
11581152
}
11591153
#endif /* CONFIG_MEMORY_HOTPLUG */

include/linux/memory_hotplug.h

+5-2
Original file line numberDiff line numberDiff line change
@@ -110,8 +110,8 @@ static inline bool movable_node_is_enabled(void)
110110

111111
extern void arch_remove_memory(int nid, u64 start, u64 size,
112112
struct vmem_altmap *altmap);
113-
extern void __remove_pages(struct zone *zone, unsigned long start_pfn,
114-
unsigned long nr_pages, struct vmem_altmap *altmap);
113+
extern void __remove_pages(unsigned long start_pfn, unsigned long nr_pages,
114+
struct vmem_altmap *altmap);
115115

116116
/* reasonably generic interface to expand the physical pages */
117117
extern int __add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages,
@@ -331,6 +331,9 @@ extern int arch_add_memory(int nid, u64 start, u64 size,
331331
struct vmem_altmap *altmap, bool want_memblock);
332332
extern void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn,
333333
unsigned long nr_pages, struct vmem_altmap *altmap);
334+
extern void remove_pfn_range_from_zone(struct zone *zone,
335+
unsigned long start_pfn,
336+
unsigned long nr_pages);
334337
extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages);
335338
extern bool is_memblock_offlined(struct memory_block *mem);
336339
extern int sparse_add_one_section(int nid, unsigned long start_pfn,

kernel/memremap.c

+1-2
Original file line numberDiff line numberDiff line change
@@ -141,8 +141,7 @@ static void devm_memremap_pages_release(void *data)
141141
mem_hotplug_begin();
142142
if (pgmap->type == MEMORY_DEVICE_PRIVATE) {
143143
pfn = align_start >> PAGE_SHIFT;
144-
__remove_pages(page_zone(first_page), pfn,
145-
align_size >> PAGE_SHIFT, NULL);
144+
__remove_pages(pfn, align_size >> PAGE_SHIFT, NULL);
146145
} else {
147146
arch_remove_memory(nid, align_start, align_size,
148147
pgmap->altmap_valid ? &pgmap->altmap : NULL);

mm/hmm.c

+1-3
Original file line numberDiff line numberDiff line change
@@ -997,7 +997,6 @@ static void hmm_devmem_release(void *data)
997997
struct hmm_devmem *devmem = data;
998998
struct resource *resource = devmem->resource;
999999
unsigned long start_pfn, npages;
1000-
struct zone *zone;
10011000
struct page *page;
10021001
int nid;
10031002

@@ -1006,12 +1005,11 @@ static void hmm_devmem_release(void *data)
10061005
npages = ALIGN(resource_size(resource), PA_SECTION_SIZE) >> PAGE_SHIFT;
10071006

10081007
page = pfn_to_page(start_pfn);
1009-
zone = page_zone(page);
10101008
nid = page_to_nid(page);
10111009

10121010
mem_hotplug_begin();
10131011
if (resource->desc == IORES_DESC_DEVICE_PRIVATE_MEMORY)
1014-
__remove_pages(zone, start_pfn, npages, NULL);
1012+
__remove_pages(start_pfn, npages, NULL);
10151013
else
10161014
arch_remove_memory(nid, start_pfn << PAGE_SHIFT,
10171015
npages << PAGE_SHIFT, NULL);

mm/memory_hotplug.c

+14-15
Original file line numberDiff line numberDiff line change
@@ -449,10 +449,11 @@ static void update_pgdat_span(struct pglist_data *pgdat)
449449
pgdat->node_spanned_pages = node_end_pfn - node_start_pfn;
450450
}
451451

452-
static void __remove_zone(struct zone *zone, unsigned long start_pfn)
452+
void __ref remove_pfn_range_from_zone(struct zone *zone,
453+
unsigned long start_pfn,
454+
unsigned long nr_pages)
453455
{
454456
struct pglist_data *pgdat = zone->zone_pgdat;
455-
int nr_pages = PAGES_PER_SECTION;
456457
unsigned long flags;
457458

458459
#ifdef CONFIG_ZONE_DEVICE
@@ -465,14 +466,17 @@ static void __remove_zone(struct zone *zone, unsigned long start_pfn)
465466
return;
466467
#endif
467468

469+
clear_zone_contiguous(zone);
470+
468471
pgdat_resize_lock(zone->zone_pgdat, &flags);
469472
shrink_zone_span(zone, start_pfn, start_pfn + nr_pages);
470473
update_pgdat_span(pgdat);
471474
pgdat_resize_unlock(zone->zone_pgdat, &flags);
475+
476+
set_zone_contiguous(zone);
472477
}
473478

474-
static void __remove_section(struct zone *zone, struct mem_section *ms,
475-
unsigned long map_offset,
479+
static void __remove_section(struct mem_section *ms, unsigned long map_offset,
476480
struct vmem_altmap *altmap)
477481
{
478482
unsigned long start_pfn;
@@ -483,14 +487,12 @@ static void __remove_section(struct zone *zone, struct mem_section *ms,
483487

484488
scn_nr = __section_nr(ms);
485489
start_pfn = section_nr_to_pfn((unsigned long)scn_nr);
486-
__remove_zone(zone, start_pfn);
487490

488491
sparse_remove_one_section(ms, map_offset, altmap);
489492
}
490493

491494
/**
492-
* __remove_pages() - remove sections of pages from a zone
493-
* @zone: zone from which pages need to be removed
495+
* __remove_pages() - remove sections of pages
494496
* @phys_start_pfn: starting pageframe (must be aligned to start of a section)
495497
* @nr_pages: number of pages to remove (must be multiple of section size)
496498
* @altmap: alternative device page map or %NULL if default memmap is used
@@ -500,8 +502,8 @@ static void __remove_section(struct zone *zone, struct mem_section *ms,
500502
* sure that pages are marked reserved and zones are adjust properly by
501503
* calling offline_pages().
502504
*/
503-
void __remove_pages(struct zone *zone, unsigned long phys_start_pfn,
504-
unsigned long nr_pages, struct vmem_altmap *altmap)
505+
void __remove_pages(unsigned long phys_start_pfn, unsigned long nr_pages,
506+
struct vmem_altmap *altmap)
505507
{
506508
unsigned long i;
507509
unsigned long map_offset = 0;
@@ -510,8 +512,6 @@ void __remove_pages(struct zone *zone, unsigned long phys_start_pfn,
510512
if (altmap)
511513
map_offset = vmem_altmap_offset(altmap);
512514

513-
clear_zone_contiguous(zone);
514-
515515
/*
516516
* We can only remove entire sections
517517
*/
@@ -523,12 +523,9 @@ void __remove_pages(struct zone *zone, unsigned long phys_start_pfn,
523523
unsigned long pfn = phys_start_pfn + i*PAGES_PER_SECTION;
524524

525525
cond_resched();
526-
__remove_section(zone, __pfn_to_section(pfn), map_offset,
527-
altmap);
526+
__remove_section(__pfn_to_section(pfn), map_offset, altmap);
528527
map_offset = 0;
529528
}
530-
531-
set_zone_contiguous(zone);
532529
}
533530

534531
int set_online_page_callback(online_page_callback_t callback)
@@ -898,6 +895,7 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ
898895
(unsigned long long) pfn << PAGE_SHIFT,
899896
(((unsigned long long) pfn + nr_pages) << PAGE_SHIFT) - 1);
900897
memory_notify(MEM_CANCEL_ONLINE, &arg);
898+
remove_pfn_range_from_zone(zone, pfn, nr_pages);
901899
mem_hotplug_done();
902900
return ret;
903901
}
@@ -1682,6 +1680,7 @@ static int __ref __offline_pages(unsigned long start_pfn,
16821680
writeback_set_ratelimit();
16831681

16841682
memory_notify(MEM_OFFLINE, &arg);
1683+
remove_pfn_range_from_zone(zone, start_pfn, nr_pages);
16851684
mem_hotplug_done();
16861685
return 0;
16871686

0 commit comments

Comments
 (0)