s390/cmma: rework no-dat handling

Rework the way physical pages are set no-dat / dat:

The old way is:

- Rely on that all pages are initially marked "dat"
- Allocate page tables for the kernel mapping
- Enable dat
- Walk the whole kernel mapping and set PG_arch_1 bit in all struct pages
  that belong to pages of kernel page tables
- Walk all struct pages and test and clear the PG_arch_1 bit. If the bit is
  not set, set the page state to no-dat
- For all subsequent page table allocations, set the page state to dat
  (remove the no-dat state) on allocation time

Change this rather complex logic to a simpler approach:

- Set the whole physical memory (all pages) to "no-dat"
- Explicitly set those page table pages to "dat" which are part of the
  kernel image (e.g. swapper_pg_dir)
- For all subsequent page table allocations, set the page state to dat
  (remove the no-dat state) on allocation time

In result the code is simpler, and this also allows to get rid of one
odd usage of the PG_arch_1 bit.

Reviewed-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
This commit is contained in:
Heiko Carstens 2023-10-27 14:12:39 +02:00 committed by Vasily Gorbik
parent 65d37f163a
commit a51324c430
5 changed files with 21 additions and 131 deletions

View file

@ -2,6 +2,7 @@
#include <linux/sched/task.h>
#include <linux/pgtable.h>
#include <linux/kasan.h>
#include <asm/page-states.h>
#include <asm/pgalloc.h>
#include <asm/facility.h>
#include <asm/sections.h>
@ -70,6 +71,10 @@ static void kasan_populate_shadow(void)
crst_table_init((unsigned long *)kasan_early_shadow_pud, pud_val(pud_z));
crst_table_init((unsigned long *)kasan_early_shadow_pmd, pmd_val(pmd_z));
memset64((u64 *)kasan_early_shadow_pte, pte_val(pte_z), PTRS_PER_PTE);
__arch_set_page_dat(kasan_early_shadow_p4d, 1UL << CRST_ALLOC_ORDER);
__arch_set_page_dat(kasan_early_shadow_pud, 1UL << CRST_ALLOC_ORDER);
__arch_set_page_dat(kasan_early_shadow_pmd, 1UL << CRST_ALLOC_ORDER);
__arch_set_page_dat(kasan_early_shadow_pte, 1);
/*
* Current memory layout:
@ -223,6 +228,7 @@ static void *boot_crst_alloc(unsigned long val)
table = (unsigned long *)physmem_alloc_top_down(RR_VMEM, size, size);
crst_table_init(table, val);
__arch_set_page_dat(table, 1UL << CRST_ALLOC_ORDER);
return table;
}
@ -238,6 +244,7 @@ static pte_t *boot_pte_alloc(void)
if (!pte_leftover) {
pte_leftover = (void *)physmem_alloc_top_down(RR_VMEM, PAGE_SIZE, PAGE_SIZE);
pte = pte_leftover + _PAGE_TABLE_SIZE;
__arch_set_page_dat(pte, 1);
} else {
pte = pte_leftover;
pte_leftover = NULL;
@ -418,6 +425,14 @@ void setup_vmem(unsigned long asce_limit)
unsigned long asce_bits;
int i;
/*
* Mark whole memory as no-dat. This must be done before any
* page tables are allocated, or kernel image builtin pages
* are marked as dat tables.
*/
for_each_physmem_online_range(i, &start, &end)
__arch_set_page_nodat((void *)start, (end - start) >> PAGE_SHIFT);
if (asce_limit == _REGION1_SIZE) {
asce_type = _REGION2_ENTRY_EMPTY;
asce_bits = _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH;
@ -429,6 +444,8 @@ void setup_vmem(unsigned long asce_limit)
crst_table_init((unsigned long *)swapper_pg_dir, asce_type);
crst_table_init((unsigned long *)invalid_pg_dir, _REGION3_ENTRY_EMPTY);
__arch_set_page_dat((void *)swapper_pg_dir, 1UL << CRST_ALLOC_ORDER);
__arch_set_page_dat((void *)invalid_pg_dir, 1UL << CRST_ALLOC_ORDER);
/*
* To allow prefixing the lowcore must be mapped with 4KB pages.

View file

@ -125,8 +125,6 @@ static inline void vmcp_cma_reserve(void) { }
void report_user_fault(struct pt_regs *regs, long signr, int is_mm_fault);
void cmma_init_nodat(void);
extern void (*_machine_restart)(char *command);
extern void (*_machine_halt)(void);
extern void (*_machine_power_off)(void);

View file

@ -168,8 +168,6 @@ void __init mem_init(void)
/* this will put all low memory onto the freelists */
memblock_free_all();
setup_zero_pages(); /* Setup zeroed pages. */
cmma_init_nodat();
}
void free_initmem(void)

View file

@ -7,136 +7,13 @@
* Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
*/
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/mm.h>
#include <linux/memblock.h>
#include <linux/gfp.h>
#include <linux/init.h>
#include <asm/asm-extable.h>
#include <asm/facility.h>
#include <asm/page-states.h>
#include <asm/sections.h>
#include <asm/page.h>
int __bootdata_preserved(cmma_flag);
static void mark_kernel_pmd(pud_t *pud, unsigned long addr, unsigned long end)
{
unsigned long next;
struct page *page;
pmd_t *pmd;
pmd = pmd_offset(pud, addr);
do {
next = pmd_addr_end(addr, end);
if (pmd_none(*pmd) || pmd_large(*pmd))
continue;
page = phys_to_page(pmd_val(*pmd));
set_bit(PG_arch_1, &page->flags);
} while (pmd++, addr = next, addr != end);
}
static void mark_kernel_pud(p4d_t *p4d, unsigned long addr, unsigned long end)
{
unsigned long next;
struct page *page;
pud_t *pud;
int i;
pud = pud_offset(p4d, addr);
do {
next = pud_addr_end(addr, end);
if (pud_none(*pud) || pud_large(*pud))
continue;
if (!pud_folded(*pud)) {
page = phys_to_page(pud_val(*pud));
for (i = 0; i < 4; i++)
set_bit(PG_arch_1, &page[i].flags);
}
mark_kernel_pmd(pud, addr, next);
} while (pud++, addr = next, addr != end);
}
static void mark_kernel_p4d(pgd_t *pgd, unsigned long addr, unsigned long end)
{
unsigned long next;
struct page *page;
p4d_t *p4d;
int i;
p4d = p4d_offset(pgd, addr);
do {
next = p4d_addr_end(addr, end);
if (p4d_none(*p4d))
continue;
if (!p4d_folded(*p4d)) {
page = phys_to_page(p4d_val(*p4d));
for (i = 0; i < 4; i++)
set_bit(PG_arch_1, &page[i].flags);
}
mark_kernel_pud(p4d, addr, next);
} while (p4d++, addr = next, addr != end);
}
static void mark_kernel_pgd(void)
{
unsigned long addr, next, max_addr;
struct page *page;
pgd_t *pgd;
int i;
addr = 0;
/*
* Figure out maximum virtual address accessible with the
* kernel ASCE. This is required to keep the page table walker
* from accessing non-existent entries.
*/
max_addr = (S390_lowcore.kernel_asce.val & _ASCE_TYPE_MASK) >> 2;
max_addr = 1UL << (max_addr * 11 + 31);
pgd = pgd_offset_k(addr);
do {
next = pgd_addr_end(addr, max_addr);
if (pgd_none(*pgd))
continue;
if (!pgd_folded(*pgd)) {
page = phys_to_page(pgd_val(*pgd));
for (i = 0; i < 4; i++)
set_bit(PG_arch_1, &page[i].flags);
}
mark_kernel_p4d(pgd, addr, next);
} while (pgd++, addr = next, addr != max_addr);
}
void __init cmma_init_nodat(void)
{
struct page *page;
unsigned long start, end, ix;
int i;
if (cmma_flag < 2)
return;
/* Mark pages used in kernel page tables */
mark_kernel_pgd();
page = virt_to_page(&swapper_pg_dir);
for (i = 0; i < 4; i++)
set_bit(PG_arch_1, &page[i].flags);
page = virt_to_page(&invalid_pg_dir);
for (i = 0; i < 4; i++)
set_bit(PG_arch_1, &page[i].flags);
/* Set all kernel pages not used for page tables to stable/no-dat */
for_each_mem_pfn_range(i, MAX_NUMNODES, &start, &end, NULL) {
page = pfn_to_page(start);
for (ix = start; ix < end; ix++, page++) {
if (__test_and_clear_bit(PG_arch_1, &page->flags))
continue; /* skip page table pages */
if (!list_empty(&page->lru))
continue; /* skip free pages */
__set_page_stable_nodat(page_to_virt(page), 1);
}
}
}
void arch_free_page(struct page *page, int order)
{
if (!cmma_flag)

View file

@ -50,8 +50,7 @@ void *vmem_crst_alloc(unsigned long val)
if (!table)
return NULL;
crst_table_init(table, val);
if (slab_is_available())
__arch_set_page_dat(table, 1UL << CRST_ALLOC_ORDER);
__arch_set_page_dat(table, 1UL << CRST_ALLOC_ORDER);
return table;
}
@ -67,6 +66,7 @@ pte_t __ref *vmem_pte_alloc(void)
if (!pte)
return NULL;
memset64((u64 *)pte, _PAGE_INVALID, PTRS_PER_PTE);
__arch_set_page_dat(pte, 1);
return pte;
}