memblock, arm: fix crashes caused by holes in the memory map

The coordination between freeing of unused memory map, pfn_valid() and core
 mm assumptions about validity of the memory map in various ranges was not
 designed for complex layouts of the physical memory with a lot of holes all
 over the place.
 
 Kefen Wang reported crashes in move_freepages() on a system with the
 following memory layout [1]:
 
   node   0: [mem 0x0000000080a00000-0x00000000855fffff]
   node   0: [mem 0x0000000086a00000-0x0000000087dfffff]
   node   0: [mem 0x000000008bd00000-0x000000008c4fffff]
   node   0: [mem 0x000000008e300000-0x000000008ecfffff]
   node   0: [mem 0x0000000090d00000-0x00000000bfffffff]
   node   0: [mem 0x00000000cc000000-0x00000000dc9fffff]
   node   0: [mem 0x00000000de700000-0x00000000de9fffff]
   node   0: [mem 0x00000000e0800000-0x00000000e0bfffff]
   node   0: [mem 0x00000000f4b00000-0x00000000f6ffffff]
   node   0: [mem 0x00000000fda00000-0x00000000ffffefff]
 
 These crashes can be mitigated by enabling CONFIG_HOLES_IN_ZONE on ARM and
 essentially turning pfn_valid_within() to pfn_valid() instead of having it
 hardwired to 1 on that architecture, but this would require to keep
 CONFIG_HOLES_IN_ZONE solely for this purpose.
 
 A cleaner approach is to update ARM's implementation of pfn_valid() to take
 into accounting rounding of the freed memory map to pageblock boundaries
 and make sure it returns true for PFNs that have memory map entries even if
 there is no physical memory backing those PFNs.
 
 [1] https://lore.kernel.org/lkml/2a1592ad-bc9d-4664-fd19-f7448a37edc0@huawei.com
 -----BEGIN PGP SIGNATURE-----
 
 iQFHBAABCAAxFiEEeOVYVaWZL5900a/pOQOGJssO/ZEFAmDhzQQTHHJwcHRAbGlu
 dXguaWJtLmNvbQAKCRA5A4Ymyw79kXeUCACS0lssuKbaBxFk6OkEe0nbmbwN/n9z
 zKd2AWzw9xFxYZkLfOCmi5EPUMI0IeDYjOyZmnj8YDDd7wRLVxZ51LSdyFDZafXY
 j6SVYprSmwUjLkuajmqifY5DLbZYeGuI6WFvNVLljltHc0i/GIzx1Tld2yO/M0Jk
 NzHQ0/5nXmU74PvvY8LrWk+rRjTYqMuolHvbbl4nNId5e/FYEWNxEqNO5gq6aG5g
 +5t1BjyLf1NMp67uc5aLoLmr2ZwK8/UmZeSZ7i9z03gU/5B1srLluhoBsYBPVHFY
 hRNRKwWUDRUmqjJnu5/EzI+iQnj7t6zV1hyt+E5B1gb89vuSVcJNOPQt
 =wCcY
 -----END PGP SIGNATURE-----

Merge tag 'memblock-v5.14-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rppt/memblock

Pull memblock updates from Mike Rapoport:
 "Fix arm crashes caused by holes in the memory map.

  The coordination between freeing of unused memory map, pfn_valid() and
  core mm assumptions about validity of the memory map in various ranges
  was not designed for complex layouts of the physical memory with a lot
  of holes all over the place.

  Kefen Wang reported crashes in move_freepages() on a system with the
  following memory layout [1]:

	node 0: [mem 0x0000000080a00000-0x00000000855fffff]
	node 0: [mem 0x0000000086a00000-0x0000000087dfffff]
	node 0: [mem 0x000000008bd00000-0x000000008c4fffff]
	node 0: [mem 0x000000008e300000-0x000000008ecfffff]
	node 0: [mem 0x0000000090d00000-0x00000000bfffffff]
	node 0: [mem 0x00000000cc000000-0x00000000dc9fffff]
	node 0: [mem 0x00000000de700000-0x00000000de9fffff]
	node 0: [mem 0x00000000e0800000-0x00000000e0bfffff]
	node 0: [mem 0x00000000f4b00000-0x00000000f6ffffff]
	node 0: [mem 0x00000000fda00000-0x00000000ffffefff]

  These crashes can be mitigated by enabling CONFIG_HOLES_IN_ZONE on ARM
  and essentially turning pfn_valid_within() to pfn_valid() instead of
  having it hardwired to 1 on that architecture, but this would require
  to keep CONFIG_HOLES_IN_ZONE solely for this purpose.

  A cleaner approach is to update ARM's implementation of pfn_valid() to
  take into accounting rounding of the freed memory map to pageblock
  boundaries and make sure it returns true for PFNs that have memory map
  entries even if there is no physical memory backing those PFNs"

Link: https://lore.kernel.org/lkml/2a1592ad-bc9d-4664-fd19-f7448a37edc0@huawei.com [1]

* tag 'memblock-v5.14-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rppt/memblock:
  arm: extend pfn_valid to take into account freed memory map alignment
  memblock: ensure there is no overflow in memblock_overlaps_region()
  memblock: align freed memory map on pageblock boundaries with SPARSEMEM
  memblock: free_unused_memmap: use pageblock units instead of MAX_ORDER
This commit is contained in:
Linus Torvalds 2021-07-04 12:23:05 -07:00
commit a412897fb5
2 changed files with 27 additions and 14 deletions

View file

@ -125,11 +125,22 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max_low,
int pfn_valid(unsigned long pfn) int pfn_valid(unsigned long pfn)
{ {
phys_addr_t addr = __pfn_to_phys(pfn); phys_addr_t addr = __pfn_to_phys(pfn);
unsigned long pageblock_size = PAGE_SIZE * pageblock_nr_pages;
if (__phys_to_pfn(addr) != pfn) if (__phys_to_pfn(addr) != pfn)
return 0; return 0;
return memblock_is_map_memory(addr); /*
* If address less than pageblock_size bytes away from a present
* memory chunk there still will be a memory map entry for it
* because we round freed memory map to the pageblock boundaries.
*/
if (memblock_overlaps_region(&memblock.memory,
ALIGN_DOWN(addr, pageblock_size),
pageblock_size))
return 1;
return 0;
} }
EXPORT_SYMBOL(pfn_valid); EXPORT_SYMBOL(pfn_valid);
#endif #endif

View file

@ -182,6 +182,8 @@ bool __init_memblock memblock_overlaps_region(struct memblock_type *type,
{ {
unsigned long i; unsigned long i;
memblock_cap_size(base, &size);
for (i = 0; i < type->cnt; i++) for (i = 0; i < type->cnt; i++)
if (memblock_addrs_overlap(base, size, type->regions[i].base, if (memblock_addrs_overlap(base, size, type->regions[i].base,
type->regions[i].size)) type->regions[i].size))
@ -1799,7 +1801,6 @@ bool __init_memblock memblock_is_region_memory(phys_addr_t base, phys_addr_t siz
*/ */
bool __init_memblock memblock_is_region_reserved(phys_addr_t base, phys_addr_t size) bool __init_memblock memblock_is_region_reserved(phys_addr_t base, phys_addr_t size)
{ {
memblock_cap_size(base, &size);
return memblock_overlaps_region(&memblock.reserved, base, size); return memblock_overlaps_region(&memblock.reserved, base, size);
} }
@ -1946,14 +1947,13 @@ static void __init free_unused_memmap(void)
* due to SPARSEMEM sections which aren't present. * due to SPARSEMEM sections which aren't present.
*/ */
start = min(start, ALIGN(prev_end, PAGES_PER_SECTION)); start = min(start, ALIGN(prev_end, PAGES_PER_SECTION));
#else
/*
* Align down here since the VM subsystem insists that the
* memmap entries are valid from the bank start aligned to
* MAX_ORDER_NR_PAGES.
*/
start = round_down(start, MAX_ORDER_NR_PAGES);
#endif #endif
/*
* Align down here since many operations in VM subsystem
* presume that there are no holes in the memory map inside
* a pageblock
*/
start = round_down(start, pageblock_nr_pages);
/* /*
* If we had a previous bank, and there is a space * If we had a previous bank, and there is a space
@ -1963,16 +1963,18 @@ static void __init free_unused_memmap(void)
free_memmap(prev_end, start); free_memmap(prev_end, start);
/* /*
* Align up here since the VM subsystem insists that the * Align up here since many operations in VM subsystem
* memmap entries are valid from the bank end aligned to * presume that there are no holes in the memory map inside
* MAX_ORDER_NR_PAGES. * a pageblock
*/ */
prev_end = ALIGN(end, MAX_ORDER_NR_PAGES); prev_end = ALIGN(end, pageblock_nr_pages);
} }
#ifdef CONFIG_SPARSEMEM #ifdef CONFIG_SPARSEMEM
if (!IS_ALIGNED(prev_end, PAGES_PER_SECTION)) if (!IS_ALIGNED(prev_end, PAGES_PER_SECTION)) {
prev_end = ALIGN(end, pageblock_nr_pages);
free_memmap(prev_end, ALIGN(prev_end, PAGES_PER_SECTION)); free_memmap(prev_end, ALIGN(prev_end, PAGES_PER_SECTION));
}
#endif #endif
} }