linux/arch/s390/include/asm/pgalloc.h
Martin Schwidefsky cd94154cc6 [S390] fix tlb flushing for page table pages
Git commit 36409f6353 "use generic RCU
page-table freeing code" introduced a tlb flushing bug. Partially revert
the above git commit and go back to s390 specific page table flush code.

For s390 the TLB can contain three types of entries, "normal" TLB
page-table entries, TLB combined region-and-segment-table (CRST) entries
and real-space entries. Linux does not use real-space entries which
leaves normal TLB entries and CRST entries. The CRST entries are
intermediate steps in the page-table translation called translation paths.
For example a 4K page access in a three-level page table setup will
create two CRST TLB entries and one page-table TLB entry. The advantage
of that approach is that a page access next to the previous one can reuse
the CRST entries and needs just a single read from memory to create the
page-table TLB entry. The disadvantage is that the TLB flushing rules are
more complicated, before any page-table may be freed the TLB needs to be
flushed.

In short: the generic RCU page-table freeing code is incorrect for the
CRST entries, in particular the check for mm_users < 2 is troublesome.

This is applicable to 3.0+ kernels.

Cc: <stable@vger.kernel.org>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
2012-04-11 14:28:24 +02:00

143 lines
3.9 KiB
C

/*
* include/asm-s390/pgalloc.h
*
* S390 version
* Copyright (C) 1999,2000 IBM Deutschland Entwicklung GmbH, IBM Corporation
* Author(s): Hartmut Penner (hp@de.ibm.com)
* Martin Schwidefsky (schwidefsky@de.ibm.com)
*
* Derived from "include/asm-i386/pgalloc.h"
* Copyright (C) 1994 Linus Torvalds
*/
#ifndef _S390_PGALLOC_H
#define _S390_PGALLOC_H
#include <linux/threads.h>
#include <linux/gfp.h>
#include <linux/mm.h>
unsigned long *crst_table_alloc(struct mm_struct *);
void crst_table_free(struct mm_struct *, unsigned long *);
unsigned long *page_table_alloc(struct mm_struct *, unsigned long);
void page_table_free(struct mm_struct *, unsigned long *);
void page_table_free_rcu(struct mmu_gather *, unsigned long *);
static inline void clear_table(unsigned long *s, unsigned long val, size_t n)
{
typedef struct { char _[n]; } addrtype;
*s = val;
n = (n / 256) - 1;
asm volatile(
#ifdef CONFIG_64BIT
" mvc 8(248,%0),0(%0)\n"
#else
" mvc 4(252,%0),0(%0)\n"
#endif
"0: mvc 256(256,%0),0(%0)\n"
" la %0,256(%0)\n"
" brct %1,0b\n"
: "+a" (s), "+d" (n), "=m" (*(addrtype *) s)
: "m" (*(addrtype *) s));
}
static inline void crst_table_init(unsigned long *crst, unsigned long entry)
{
clear_table(crst, entry, sizeof(unsigned long)*2048);
}
#ifndef __s390x__
static inline unsigned long pgd_entry_type(struct mm_struct *mm)
{
return _SEGMENT_ENTRY_EMPTY;
}
#define pud_alloc_one(mm,address) ({ BUG(); ((pud_t *)2); })
#define pud_free(mm, x) do { } while (0)
#define pmd_alloc_one(mm,address) ({ BUG(); ((pmd_t *)2); })
#define pmd_free(mm, x) do { } while (0)
#define pgd_populate(mm, pgd, pud) BUG()
#define pud_populate(mm, pud, pmd) BUG()
#else /* __s390x__ */
static inline unsigned long pgd_entry_type(struct mm_struct *mm)
{
if (mm->context.asce_limit <= (1UL << 31))
return _SEGMENT_ENTRY_EMPTY;
if (mm->context.asce_limit <= (1UL << 42))
return _REGION3_ENTRY_EMPTY;
return _REGION2_ENTRY_EMPTY;
}
int crst_table_upgrade(struct mm_struct *, unsigned long limit);
void crst_table_downgrade(struct mm_struct *, unsigned long limit);
static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address)
{
unsigned long *table = crst_table_alloc(mm);
if (table)
crst_table_init(table, _REGION3_ENTRY_EMPTY);
return (pud_t *) table;
}
#define pud_free(mm, pud) crst_table_free(mm, (unsigned long *) pud)
static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long vmaddr)
{
unsigned long *table = crst_table_alloc(mm);
if (table)
crst_table_init(table, _SEGMENT_ENTRY_EMPTY);
return (pmd_t *) table;
}
#define pmd_free(mm, pmd) crst_table_free(mm, (unsigned long *) pmd)
static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
{
pgd_val(*pgd) = _REGION2_ENTRY | __pa(pud);
}
static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
{
pud_val(*pud) = _REGION3_ENTRY | __pa(pmd);
}
#endif /* __s390x__ */
static inline pgd_t *pgd_alloc(struct mm_struct *mm)
{
spin_lock_init(&mm->context.list_lock);
INIT_LIST_HEAD(&mm->context.pgtable_list);
INIT_LIST_HEAD(&mm->context.gmap_list);
return (pgd_t *) crst_table_alloc(mm);
}
#define pgd_free(mm, pgd) crst_table_free(mm, (unsigned long *) pgd)
static inline void pmd_populate(struct mm_struct *mm,
pmd_t *pmd, pgtable_t pte)
{
pmd_val(*pmd) = _SEGMENT_ENTRY + __pa(pte);
}
#define pmd_populate_kernel(mm, pmd, pte) pmd_populate(mm, pmd, pte)
#define pmd_pgtable(pmd) \
(pgtable_t)(pmd_val(pmd) & -sizeof(pte_t)*PTRS_PER_PTE)
/*
* page table entry allocation/free routines.
*/
#define pte_alloc_one_kernel(mm, vmaddr) ((pte_t *) page_table_alloc(mm, vmaddr))
#define pte_alloc_one(mm, vmaddr) ((pte_t *) page_table_alloc(mm, vmaddr))
#define pte_free_kernel(mm, pte) page_table_free(mm, (unsigned long *) pte)
#define pte_free(mm, pte) page_table_free(mm, (unsigned long *) pte)
extern void rcu_table_freelist_finish(void);
#endif /* _S390_PGALLOC_H */