Skip to content

Commit

Permalink
mm: discard memblock data later
Browse files Browse the repository at this point in the history
There is existing use after free bug when deferred struct pages are
enabled:

The memblock_add() allocates memory for the memory array if more than
128 entries are needed.  See comment in e820__memblock_setup():

  * The bootstrap memblock region count maximum is 128 entries
  * (INIT_MEMBLOCK_REGIONS), but EFI might pass us more E820 entries
  * than that - so allow memblock resizing.

This memblock memory is freed here:
        free_low_memory_core_early()

We access the freed memblock.memory later in boot when deferred pages
are initialized in this path:

        deferred_init_memmap()
                for_each_mem_pfn_range()
                  __next_mem_pfn_range()
                    type = &memblock.memory;

One possible explanation for why this use-after-free hasn't been hit
before is that the limit of INIT_MEMBLOCK_REGIONS has never been
exceeded at least on systems where deferred struct pages were enabled.

Tested by reducing INIT_MEMBLOCK_REGIONS down to 4 from the current 128,
and verifying in qemu that this code is getting excuted and that the
freed pages are sane.

Link: http://lkml.kernel.org/r/[email protected]
Fixes: 7e18adb ("mm: meminit: initialise remaining struct pages in parallel with kswapd")
Signed-off-by: Pavel Tatashin <[email protected]>
Reviewed-by: Steven Sistare <[email protected]>
Reviewed-by: Daniel Jordan <[email protected]>
Reviewed-by: Bob Picco <[email protected]>
Acked-by: Michal Hocko <[email protected]>
Cc: Mel Gorman <[email protected]>
Cc: <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
  • Loading branch information
Pavel Tatashin authored and torvalds committed Aug 18, 2017
1 parent 768dc4e commit 3010f87
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 39 deletions.
6 changes: 4 additions & 2 deletions include/linux/memblock.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ extern int memblock_debug;
#ifdef CONFIG_ARCH_DISCARD_MEMBLOCK
#define __init_memblock __meminit
#define __initdata_memblock __meminitdata
void memblock_discard(void);
#else
#define __init_memblock
#define __initdata_memblock
Expand All @@ -74,8 +75,6 @@ phys_addr_t memblock_find_in_range_node(phys_addr_t size, phys_addr_t align,
int nid, ulong flags);
phys_addr_t memblock_find_in_range(phys_addr_t start, phys_addr_t end,
phys_addr_t size, phys_addr_t align);
phys_addr_t get_allocated_memblock_reserved_regions_info(phys_addr_t *addr);
phys_addr_t get_allocated_memblock_memory_regions_info(phys_addr_t *addr);
void memblock_allow_resize(void);
int memblock_add_node(phys_addr_t base, phys_addr_t size, int nid);
int memblock_add(phys_addr_t base, phys_addr_t size);
Expand Down Expand Up @@ -110,6 +109,9 @@ void __next_mem_range_rev(u64 *idx, int nid, ulong flags,
void __next_reserved_mem_region(u64 *idx, phys_addr_t *out_start,
phys_addr_t *out_end);

void __memblock_free_early(phys_addr_t base, phys_addr_t size);
void __memblock_free_late(phys_addr_t base, phys_addr_t size);

/**
* for_each_mem_range - iterate through memblock areas from type_a and not
* included in type_b. Or just type_a if type_b is NULL.
Expand Down
38 changes: 17 additions & 21 deletions mm/memblock.c
Original file line number Diff line number Diff line change
Expand Up @@ -285,31 +285,27 @@ static void __init_memblock memblock_remove_region(struct memblock_type *type, u
}

#ifdef CONFIG_ARCH_DISCARD_MEMBLOCK

phys_addr_t __init_memblock get_allocated_memblock_reserved_regions_info(
phys_addr_t *addr)
{
if (memblock.reserved.regions == memblock_reserved_init_regions)
return 0;

*addr = __pa(memblock.reserved.regions);

return PAGE_ALIGN(sizeof(struct memblock_region) *
memblock.reserved.max);
}

phys_addr_t __init_memblock get_allocated_memblock_memory_regions_info(
phys_addr_t *addr)
/**
* Discard memory and reserved arrays if they were allocated
*/
void __init memblock_discard(void)
{
if (memblock.memory.regions == memblock_memory_init_regions)
return 0;
phys_addr_t addr, size;

*addr = __pa(memblock.memory.regions);
if (memblock.reserved.regions != memblock_reserved_init_regions) {
addr = __pa(memblock.reserved.regions);
size = PAGE_ALIGN(sizeof(struct memblock_region) *
memblock.reserved.max);
__memblock_free_late(addr, size);
}

return PAGE_ALIGN(sizeof(struct memblock_region) *
memblock.memory.max);
if (memblock.memory.regions == memblock_memory_init_regions) {
addr = __pa(memblock.memory.regions);
size = PAGE_ALIGN(sizeof(struct memblock_region) *
memblock.memory.max);
__memblock_free_late(addr, size);
}
}

#endif

/**
Expand Down
16 changes: 0 additions & 16 deletions mm/nobootmem.c
Original file line number Diff line number Diff line change
Expand Up @@ -146,22 +146,6 @@ static unsigned long __init free_low_memory_core_early(void)
NULL)
count += __free_memory_core(start, end);

#ifdef CONFIG_ARCH_DISCARD_MEMBLOCK
{
phys_addr_t size;

/* Free memblock.reserved array if it was allocated */
size = get_allocated_memblock_reserved_regions_info(&start);
if (size)
count += __free_memory_core(start, start + size);

/* Free memblock.memory array if it was allocated */
size = get_allocated_memblock_memory_regions_info(&start);
if (size)
count += __free_memory_core(start, start + size);
}
#endif

return count;
}

Expand Down
4 changes: 4 additions & 0 deletions mm/page_alloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -1584,6 +1584,10 @@ void __init page_alloc_init_late(void)
/* Reinit limits that are based on free pages after the kernel is up */
files_maxfiles_init();
#endif
#ifdef CONFIG_ARCH_DISCARD_MEMBLOCK
/* Discard memblock private memory */
memblock_discard();
#endif

for_each_populated_zone(zone)
set_zone_contiguous(zone);
Expand Down

0 comments on commit 3010f87

Please sign in to comment.