Merge branch 'mm-hotfixes-stable' into mm-stable

This commit is contained in:
Andrew Morton 2022-11-30 14:58:42 -08:00
commit a38358c934
66 changed files with 36536 additions and 35943 deletions

View file

@ -29,6 +29,7 @@ Alexandre Belloni <alexandre.belloni@bootlin.com> <alexandre.belloni@free-electr
Alexei Starovoitov <ast@kernel.org> <alexei.starovoitov@gmail.com>
Alexei Starovoitov <ast@kernel.org> <ast@fb.com>
Alexei Starovoitov <ast@kernel.org> <ast@plumgrid.com>
Alex Hung <alexhung@gmail.com> <alex.hung@canonical.com>
Alex Shi <alexs@kernel.org> <alex.shi@intel.com>
Alex Shi <alexs@kernel.org> <alex.shi@linaro.org>
Alex Shi <alexs@kernel.org> <alex.shi@linux.alibaba.com>
@ -382,6 +383,7 @@ Santosh Shilimkar <santosh.shilimkar@oracle.org>
Santosh Shilimkar <ssantosh@kernel.org>
Sarangdhar Joshi <spjoshi@codeaurora.org>
Sascha Hauer <s.hauer@pengutronix.de>
Satya Priya <quic_c_skakit@quicinc.com> <skakit@codeaurora.org>
S.Çağlar Onur <caglar@pardus.org.tr>
Sean Christopherson <seanjc@google.com> <sean.j.christopherson@intel.com>
Sean Nyekjaer <sean@geanix.com> <sean.nyekjaer@prevas.dk>

View file

@ -67,6 +67,7 @@ uninitialized in the local variable, as well as the stack where the value was
copied to another memory location before use.
A use of uninitialized value ``v`` is reported by KMSAN in the following cases:
- in a condition, e.g. ``if (v) { ... }``;
- in an indexing or pointer dereferencing, e.g. ``array[v]`` or ``*v``;
- when it is copied to userspace or hardware, e.g. ``copy_to_user(..., &v, ...)``;

View file

@ -10288,7 +10288,7 @@ T: git https://github.com/intel/gvt-linux.git
F: drivers/gpu/drm/i915/gvt/
INTEL HID EVENT DRIVER
M: Alex Hung <alex.hung@canonical.com>
M: Alex Hung <alexhung@gmail.com>
L: platform-driver-x86@vger.kernel.org
S: Maintained
F: drivers/platform/x86/intel/hid.c

View file

@ -480,6 +480,7 @@ static inline pmd_t pmd_mkdirty(pmd_t pmd)
return pmd;
}
#define pmd_young pmd_young
static inline int pmd_young(pmd_t pmd)
{
return !!(pmd_val(pmd) & _PAGE_ACCESSED);

View file

@ -620,6 +620,7 @@ static inline pmd_t pmd_mkdirty(pmd_t pmd)
return pmd;
}
#define pmd_young pmd_young
static inline int pmd_young(pmd_t pmd)
{
return !!(pmd_val(pmd) & _PAGE_ACCESSED);

View file

@ -600,6 +600,7 @@ static inline int pmd_dirty(pmd_t pmd)
return pte_dirty(pmd_pte(pmd));
}
#define pmd_young pmd_young
static inline int pmd_young(pmd_t pmd)
{
return pte_young(pmd_pte(pmd));

View file

@ -763,6 +763,7 @@ static inline int pmd_dirty(pmd_t pmd)
return (pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY) != 0;
}
#define pmd_young pmd_young
static inline int pmd_young(pmd_t pmd)
{
return (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG) != 0;

View file

@ -693,6 +693,7 @@ static inline unsigned long pmd_dirty(pmd_t pmd)
return pte_dirty(pte);
}
#define pmd_young pmd_young
static inline unsigned long pmd_young(pmd_t pmd)
{
pte_t pte = __pte(pmd_val(pmd));

View file

@ -139,6 +139,7 @@ static inline int pmd_dirty(pmd_t pmd)
return pmd_flags(pmd) & _PAGE_DIRTY;
}
#define pmd_young pmd_young
static inline int pmd_young(pmd_t pmd)
{
return pmd_flags(pmd) & _PAGE_ACCESSED;
@ -1438,6 +1439,14 @@ static inline bool arch_has_hw_pte_young(void)
return true;
}
#ifdef CONFIG_XEN_PV
#define arch_has_hw_nonleaf_pmd_young arch_has_hw_nonleaf_pmd_young
static inline bool arch_has_hw_nonleaf_pmd_young(void)
{
return !cpu_feature_enabled(X86_FEATURE_XENPV);
}
#endif
#ifdef CONFIG_PAGE_TABLE_CHECK
static inline bool pte_user_accessible_page(pte_t pte)
{

View file

@ -15,6 +15,7 @@
#include <linux/context_tracking.h>
#include <linux/interrupt.h>
#include <linux/kallsyms.h>
#include <linux/kmsan.h>
#include <linux/spinlock.h>
#include <linux/kprobes.h>
#include <linux/uaccess.h>
@ -301,6 +302,12 @@ static noinstr bool handle_bug(struct pt_regs *regs)
{
bool handled = false;
/*
* Normally @regs are unpoisoned by irqentry_enter(), but handle_bug()
* is a rare case that uses @regs without passing them to
* irqentry_enter().
*/
kmsan_unpoison_entry_regs(regs);
if (!is_valid_bugaddr(regs->ip))
return handled;

View file

@ -6,6 +6,7 @@
#include <linux/uaccess.h>
#include <linux/export.h>
#include <linux/instrumented.h>
#include <asm/tlbflush.h>
@ -44,7 +45,9 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
* called from other contexts.
*/
pagefault_disable();
instrument_copy_from_user_before(to, from, n);
ret = raw_copy_from_user(to, from, n);
instrument_copy_from_user_after(to, from, n, ret);
pagefault_enable();
return ret;

View file

@ -37,8 +37,12 @@ int pmd_huge(pmd_t pmd)
*/
int pud_huge(pud_t pud)
{
#if CONFIG_PGTABLE_LEVELS > 2
return !pud_none(pud) &&
(pud_val(pud) & (_PAGE_PRESENT|_PAGE_PSE)) != _PAGE_PRESENT;
#else
return 0;
#endif
}
#ifdef CONFIG_HUGETLB_PAGE

View file

@ -5,6 +5,7 @@ menu "Display Engine Configuration"
config DRM_AMD_DC
bool "AMD DC - Enable new display engine"
default y
depends on BROKEN || !CC_IS_CLANG || X86_64 || SPARC64 || ARM64
select SND_HDA_COMPONENT if SND_HDA_CORE
select DRM_AMD_DC_DCN if (X86 || PPC_LONG_DOUBLE_128)
help
@ -12,6 +13,12 @@ config DRM_AMD_DC
support for AMDGPU. This adds required support for Vega and
Raven ASICs.
calculate_bandwidth() is presently broken on all !(X86_64 || SPARC64 || ARM64)
architectures built with Clang (all released versions), whereby the stack
frame gets blown up to well over 5k. This would cause an immediate kernel
panic on most architectures. We'll revert this when the following bug report
has been resolved: https://github.com/llvm/llvm-project/issues/41896.
config DRM_AMD_DC_DCN
def_bool n
help

View file

@ -512,7 +512,7 @@ static u64 bio_end_offset(struct bio *bio)
static noinline int add_ra_bio_pages(struct inode *inode,
u64 compressed_end,
struct compressed_bio *cb,
unsigned long *pflags)
int *memstall, unsigned long *pflags)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
unsigned long end_index;
@ -581,8 +581,10 @@ static noinline int add_ra_bio_pages(struct inode *inode,
continue;
}
if (PageWorkingset(page))
if (!*memstall && PageWorkingset(page)) {
psi_memstall_enter(pflags);
*memstall = 1;
}
ret = set_page_extent_mapped(page);
if (ret < 0) {
@ -670,8 +672,8 @@ void btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
u64 em_len;
u64 em_start;
struct extent_map *em;
/* Initialize to 1 to make skip psi_memstall_leave unless needed */
unsigned long pflags = 1;
unsigned long pflags;
int memstall = 0;
blk_status_t ret;
int ret2;
int i;
@ -727,7 +729,7 @@ void btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
goto fail;
}
add_ra_bio_pages(inode, em_start + em_len, cb, &pflags);
add_ra_bio_pages(inode, em_start + em_len, cb, &memstall, &pflags);
/* include any pages we added in add_ra-bio_pages */
cb->len = bio->bi_iter.bi_size;
@ -807,7 +809,7 @@ void btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
}
}
if (!pflags)
if (memstall)
psi_memstall_leave(&pflags);
if (refcount_dec_and_test(&cb->pending_ios))

View file

@ -1412,8 +1412,8 @@ static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f,
struct block_device *last_bdev;
unsigned int nr_bios = 0;
struct bio *bio = NULL;
/* initialize to 1 to make skip psi_memstall_leave unless needed */
unsigned long pflags = 1;
unsigned long pflags;
int memstall = 0;
bi_private = jobqueueset_init(sb, q, fgq, force_fg);
qtail[JQ_BYPASS] = &q[JQ_BYPASS]->head;
@ -1463,14 +1463,18 @@ static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f,
if (bio && (cur != last_index + 1 ||
last_bdev != mdev.m_bdev)) {
submit_bio_retry:
if (!pflags)
psi_memstall_leave(&pflags);
submit_bio(bio);
if (memstall) {
psi_memstall_leave(&pflags);
memstall = 0;
}
bio = NULL;
}
if (unlikely(PageWorkingset(page)))
if (unlikely(PageWorkingset(page)) && !memstall) {
psi_memstall_enter(&pflags);
memstall = 1;
}
if (!bio) {
bio = bio_alloc(mdev.m_bdev, BIO_MAX_VECS,
@ -1500,9 +1504,9 @@ static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f,
} while (owned_head != Z_EROFS_PCLUSTER_TAIL);
if (bio) {
if (!pflags)
psi_memstall_leave(&pflags);
submit_bio(bio);
if (memstall)
psi_memstall_leave(&pflags);
}
/*

View file

@ -328,6 +328,12 @@ static ssize_t hugetlbfs_read_iter(struct kiocb *iocb, struct iov_iter *to)
} else {
unlock_page(page);
if (PageHWPoison(page)) {
put_page(page);
retval = -EIO;
break;
}
/*
* We have the page, copy it to user space buffer.
*/
@ -1111,13 +1117,6 @@ static int hugetlbfs_migrate_folio(struct address_space *mapping,
static int hugetlbfs_error_remove_page(struct address_space *mapping,
struct page *page)
{
struct inode *inode = mapping->host;
pgoff_t index = page->index;
hugetlb_delete_from_page_cache(page);
if (unlikely(hugetlb_unreserve_pages(inode, index, index + 1, 1)))
hugetlb_fix_reserve_counts(inode);
return 0;
}

View file

@ -111,6 +111,13 @@ static void nilfs_dat_commit_free(struct inode *dat,
kunmap_atomic(kaddr);
nilfs_dat_commit_entry(dat, req);
if (unlikely(req->pr_desc_bh == NULL || req->pr_bitmap_bh == NULL)) {
nilfs_error(dat->i_sb,
"state inconsistency probably due to duplicate use of vblocknr = %llu",
(unsigned long long)req->pr_entry_nr);
return;
}
nilfs_palloc_commit_free_entry(dat, req);
}

View file

@ -317,7 +317,7 @@ void nilfs_relax_pressure_in_lock(struct super_block *sb)
struct the_nilfs *nilfs = sb->s_fs_info;
struct nilfs_sc_info *sci = nilfs->ns_writer;
if (!sci || !sci->sc_flush_request)
if (sb_rdonly(sb) || unlikely(!sci) || !sci->sc_flush_request)
return;
set_bit(NILFS_SC_PRIOR_FLUSH, &sci->sc_flags);
@ -2242,7 +2242,7 @@ int nilfs_construct_segment(struct super_block *sb)
struct nilfs_sc_info *sci = nilfs->ns_writer;
struct nilfs_transaction_info *ti;
if (!sci)
if (sb_rdonly(sb) || unlikely(!sci))
return -EROFS;
/* A call inside transactions causes a deadlock. */
@ -2280,7 +2280,7 @@ int nilfs_construct_dsync_segment(struct super_block *sb, struct inode *inode,
struct nilfs_transaction_info ti;
int err = 0;
if (!sci)
if (sb_rdonly(sb) || unlikely(!sci))
return -EROFS;
nilfs_transaction_lock(sb, &ti, 0);
@ -2776,11 +2776,12 @@ int nilfs_attach_log_writer(struct super_block *sb, struct nilfs_root *root)
if (nilfs->ns_writer) {
/*
* This happens if the filesystem was remounted
* read/write after nilfs_error degenerated it into a
* read-only mount.
* This happens if the filesystem is made read-only by
* __nilfs_error or nilfs_remount and then remounted
* read/write. In these cases, reuse the existing
* writer.
*/
nilfs_detach_log_writer(sb);
return 0;
}
nilfs->ns_writer = nilfs_segctor_new(sb, root);

View file

@ -495,14 +495,22 @@ void nilfs_sufile_do_free(struct inode *sufile, __u64 segnum,
int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum)
{
struct buffer_head *bh;
void *kaddr;
struct nilfs_segment_usage *su;
int ret;
down_write(&NILFS_MDT(sufile)->mi_sem);
ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &bh);
if (!ret) {
mark_buffer_dirty(bh);
nilfs_mdt_mark_dirty(sufile);
kaddr = kmap_atomic(bh->b_page);
su = nilfs_sufile_block_get_segment_usage(sufile, segnum, bh, kaddr);
nilfs_segment_usage_set_dirty(su);
kunmap_atomic(kaddr);
brelse(bh);
}
up_write(&NILFS_MDT(sufile)->mi_sem);
return ret;
}

View file

@ -1133,8 +1133,6 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data)
if ((bool)(*flags & SB_RDONLY) == sb_rdonly(sb))
goto out;
if (*flags & SB_RDONLY) {
/* Shutting down log writer */
nilfs_detach_log_writer(sb);
sb->s_flags |= SB_RDONLY;
/*

View file

@ -690,9 +690,7 @@ int nilfs_count_free_blocks(struct the_nilfs *nilfs, sector_t *nblocks)
{
unsigned long ncleansegs;
down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
ncleansegs = nilfs_sufile_get_ncleansegs(nilfs->ns_sufile);
up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
*nblocks = (sector_t)ncleansegs * nilfs->ns_blocks_per_segment;
return 0;
}

View file

@ -115,7 +115,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
#endif
show_val_kb(m, "PageTables: ",
global_node_page_state(NR_PAGETABLE));
show_val_kb(m, "SecPageTables: ",
show_val_kb(m, "SecPageTables: ",
global_node_page_state(NR_SECONDARY_PAGETABLE));
show_val_kb(m, "NFS_Unstable: ", 0);

View file

@ -222,12 +222,16 @@ extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
#define tlb_needs_table_invalidate() (true)
#endif
void tlb_remove_table_sync_one(void);
#else
#ifdef tlb_needs_table_invalidate
#error tlb_needs_table_invalidate() requires MMU_GATHER_RCU_TABLE_FREE
#endif
static inline void tlb_remove_table_sync_one(void) { }
#endif /* CONFIG_MMU_GATHER_RCU_TABLE_FREE */

View file

@ -20,7 +20,6 @@ struct fault_attr {
atomic_t space;
unsigned long verbose;
bool task_filter;
bool no_warn;
unsigned long stacktrace_depth;
unsigned long require_start;
unsigned long require_end;
@ -32,6 +31,10 @@ struct fault_attr {
struct dentry *dname;
};
enum fault_flags {
FAULT_NOWARN = 1 << 0,
};
#define FAULT_ATTR_INITIALIZER { \
.interval = 1, \
.times = ATOMIC_INIT(1), \
@ -40,11 +43,11 @@ struct fault_attr {
.ratelimit_state = RATELIMIT_STATE_INIT_DISABLED, \
.verbose = 2, \
.dname = NULL, \
.no_warn = false, \
}
#define DECLARE_FAULT_ATTR(name) struct fault_attr name = FAULT_ATTR_INITIALIZER
int setup_fault_attr(struct fault_attr *attr, char *str);
bool should_fail_ex(struct fault_attr *attr, ssize_t size, int flags);
bool should_fail(struct fault_attr *attr, ssize_t size);
#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS

View file

@ -210,6 +210,20 @@ alloc_pages_bulk_array_node(gfp_t gfp, int nid, unsigned long nr_pages, struct p
return __alloc_pages_bulk(gfp, nid, NULL, nr_pages, NULL, page_array);
}
static inline void warn_if_node_offline(int this_node, gfp_t gfp_mask)
{
gfp_t warn_gfp = gfp_mask & (__GFP_THISNODE|__GFP_NOWARN);
if (warn_gfp != (__GFP_THISNODE|__GFP_NOWARN))
return;
if (node_online(this_node))
return;
pr_warn("%pGg allocation from offline node %d\n", &gfp_mask, this_node);
dump_stack();
}
/*
* Allocate pages, preferring the node given as nid. The node must be valid and
* online. For more general interface, see alloc_pages_node().
@ -218,7 +232,7 @@ static inline struct page *
__alloc_pages_node(int nid, gfp_t gfp_mask, unsigned int order)
{
VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES);
VM_WARN_ON((gfp_mask & __GFP_THISNODE) && !node_online(nid));
warn_if_node_offline(nid, gfp_mask);
return __alloc_pages(gfp_mask, order, nid, NULL);
}
@ -227,7 +241,7 @@ static inline
struct folio *__folio_alloc_node(gfp_t gfp, unsigned int order, int nid)
{
VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES);
VM_WARN_ON((gfp & __GFP_THISNODE) && !node_online(nid));
warn_if_node_offline(nid, gfp);
return __folio_alloc(gfp, order, nid, NULL);
}

View file

@ -638,6 +638,12 @@ static inline void mt_set_in_rcu(struct maple_tree *mt)
}
}
static inline unsigned int mt_height(const struct maple_tree *mt)
{
return (mt->ma_flags & MT_FLAGS_HEIGHT_MASK) >> MT_FLAGS_HEIGHT_OFFSET;
}
void *mt_find(struct maple_tree *mt, unsigned long *index, unsigned long max);
void *mt_find_after(struct maple_tree *mt, unsigned long *index,
unsigned long max);
@ -664,6 +670,7 @@ extern atomic_t maple_tree_tests_passed;
void mt_dump(const struct maple_tree *mt);
void mt_validate(struct maple_tree *mt);
void mt_cache_shrink(void);
#define MT_BUG_ON(__tree, __x) do { \
atomic_inc(&maple_tree_tests_run); \
if (__x) { \

View file

@ -1853,6 +1853,25 @@ static void __maybe_unused show_free_areas(unsigned int flags, nodemask_t *nodem
__show_free_areas(flags, nodemask, MAX_NR_ZONES - 1);
}
/*
* Parameter block passed down to zap_pte_range in exceptional cases.
*/
struct zap_details {
struct folio *single_folio; /* Locked folio to be unmapped */
bool even_cows; /* Zap COWed private pages too? */
zap_flags_t zap_flags; /* Extra flags for zapping */
};
/*
* Whether to drop the pte markers, for example, the uffd-wp information for
* file-backed memory. This should only be specified when we will completely
* drop the page in the mm, either by truncation or unmapping of the vma. By
* default, the flag is not set.
*/
#define ZAP_FLAG_DROP_MARKER ((__force zap_flags_t) BIT(0))
/* Set in unmap_vmas() to indicate a final unmap call. Only used by hugetlb */
#define ZAP_FLAG_UNMAP ((__force zap_flags_t) BIT(1))
#ifdef CONFIG_MMU
extern bool can_do_mlock(void);
#else
@ -1870,6 +1889,8 @@ void zap_vma_ptes(struct vm_area_struct *vma, unsigned long address,
unsigned long size);
void zap_page_range(struct vm_area_struct *vma, unsigned long address,
unsigned long size);
void zap_page_range_single(struct vm_area_struct *vma, unsigned long address,
unsigned long size, struct zap_details *details);
void unmap_vmas(struct mmu_gather *tlb, struct maple_tree *mt,
struct vm_area_struct *start_vma, unsigned long start,
unsigned long end);
@ -3493,12 +3514,4 @@ madvise_set_anon_name(struct mm_struct *mm, unsigned long start,
}
#endif
/*
* Whether to drop the pte markers, for example, the uffd-wp information for
* file-backed memory. This should only be specified when we will completely
* drop the page in the mm, either by truncation or unmapping of the vma. By
* default, the flag is not set.
*/
#define ZAP_FLAG_DROP_MARKER ((__force zap_flags_t) BIT(0))
#endif /* _LINUX_MM_H */

View file

@ -165,6 +165,13 @@ static inline pte_t *virt_to_kpte(unsigned long vaddr)
return pmd_none(*pmd) ? NULL : pte_offset_kernel(pmd, vaddr);
}
#ifndef pmd_young
static inline int pmd_young(pmd_t pmd)
{
return 0;
}
#endif
#ifndef __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
extern int ptep_set_access_flags(struct vm_area_struct *vma,
unsigned long address, pte_t *ptep,
@ -260,6 +267,17 @@ static inline int pmdp_clear_flush_young(struct vm_area_struct *vma,
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
#endif
#ifndef arch_has_hw_nonleaf_pmd_young
/*
* Return whether the accessed bit in non-leaf PMD entries is supported on the
* local CPU.
*/
static inline bool arch_has_hw_nonleaf_pmd_young(void)
{
return IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG);
}
#endif
#ifndef arch_has_hw_pte_young
/*
* Return whether the accessed bit is supported on the local CPU.

View file

@ -171,15 +171,15 @@ TRACE_EVENT(mm_collapse_huge_page_swapin,
TRACE_EVENT(mm_khugepaged_scan_file,
TP_PROTO(struct mm_struct *mm, struct page *page, const char *filename,
TP_PROTO(struct mm_struct *mm, struct page *page, struct file *file,
int present, int swap, int result),
TP_ARGS(mm, page, filename, present, swap, result),
TP_ARGS(mm, page, file, present, swap, result),
TP_STRUCT__entry(
__field(struct mm_struct *, mm)
__field(unsigned long, pfn)
__string(filename, filename)
__string(filename, file->f_path.dentry->d_iname)
__field(int, present)
__field(int, swap)
__field(int, result)
@ -188,7 +188,7 @@ TRACE_EVENT(mm_khugepaged_scan_file,
TP_fast_assign(
__entry->mm = mm;
__entry->pfn = page ? page_to_pfn(page) : -1;
__assign_str(filename, filename);
__assign_str(filename, file->f_path.dentry->d_iname);
__entry->present = present;
__entry->swap = swap;
__entry->result = result;

View file

@ -275,10 +275,8 @@ static inline void shm_rmid(struct shmid_kernel *s)
}
static int __shm_open(struct vm_area_struct *vma)
static int __shm_open(struct shm_file_data *sfd)
{
struct file *file = vma->vm_file;
struct shm_file_data *sfd = shm_file_data(file);
struct shmid_kernel *shp;
shp = shm_lock(sfd->ns, sfd->id);
@ -302,7 +300,15 @@ static int __shm_open(struct vm_area_struct *vma)
/* This is called by fork, once for every shm attach. */
static void shm_open(struct vm_area_struct *vma)
{
int err = __shm_open(vma);
struct file *file = vma->vm_file;
struct shm_file_data *sfd = shm_file_data(file);
int err;
/* Always call underlying open if present */
if (sfd->vm_ops->open)
sfd->vm_ops->open(vma);
err = __shm_open(sfd);
/*
* We raced in the idr lookup or with shm_destroy().
* Either way, the ID is busted.
@ -359,10 +365,8 @@ static bool shm_may_destroy(struct shmid_kernel *shp)
* The descriptor has already been removed from the current->mm->mmap list
* and will later be kfree()d.
*/
static void shm_close(struct vm_area_struct *vma)
static void __shm_close(struct shm_file_data *sfd)
{
struct file *file = vma->vm_file;
struct shm_file_data *sfd = shm_file_data(file);
struct shmid_kernel *shp;
struct ipc_namespace *ns = sfd->ns;
@ -388,6 +392,18 @@ static void shm_close(struct vm_area_struct *vma)
up_write(&shm_ids(ns).rwsem);
}
static void shm_close(struct vm_area_struct *vma)
{
struct file *file = vma->vm_file;
struct shm_file_data *sfd = shm_file_data(file);
/* Always call underlying close if present */
if (sfd->vm_ops->close)
sfd->vm_ops->close(vma);
__shm_close(sfd);
}
/* Called with ns->shm_ids(ns).rwsem locked */
static int shm_try_destroy_orphaned(int id, void *p, void *data)
{
@ -583,13 +599,13 @@ static int shm_mmap(struct file *file, struct vm_area_struct *vma)
* IPC ID that was removed, and possibly even reused by another shm
* segment already. Propagate this case as an error to caller.
*/
ret = __shm_open(vma);
ret = __shm_open(sfd);
if (ret)
return ret;
ret = call_mmap(sfd->file, vma);
if (ret) {
shm_close(vma);
__shm_close(sfd);
return ret;
}
sfd->vm_ops = vma->vm_ops;

View file

@ -280,6 +280,8 @@ void gcov_info_add(struct gcov_info *dst, struct gcov_info *src)
for (i = 0; i < sfn_ptr->num_counters; i++)
dfn_ptr->counters[i] += sfn_ptr->counters[i];
sfn_ptr = list_next_entry(sfn_ptr, head);
}
}

View file

@ -395,12 +395,13 @@ endif # DEBUG_INFO
config FRAME_WARN
int "Warn for stack frames larger than"
range 0 8192
default 0 if KMSAN
default 2048 if GCC_PLUGIN_LATENT_ENTROPY
default 2048 if PARISC
default 1536 if (!64BIT && XTENSA)
default 1280 if KASAN && !64BIT
default 1024 if !64BIT
default 2048 if 64BIT
default 0 if KMSAN
help
Tell the compiler to warn at build time for stack frames larger than this.
Setting this too low will cause a lot of warnings.
@ -2107,6 +2108,7 @@ config KPROBES_SANITY_TEST
depends on DEBUG_KERNEL
depends on KPROBES
depends on KUNIT
select STACKTRACE if ARCH_CORRECT_STACKTRACE_ON_KRETPROBE
default KUNIT_ALL_TESTS
help
This option provides for testing basic kprobes functionality on
@ -2241,6 +2243,10 @@ config TEST_UUID
config TEST_XARRAY
tristate "Test the XArray code at runtime"
config TEST_MAPLE_TREE
select DEBUG_MAPLE_TREE
tristate "Test the Maple Tree code at runtime"
config TEST_RHASHTABLE
tristate "Perform selftest on resizable hash table"
help

View file

@ -12,6 +12,7 @@ config KMSAN
bool "KMSAN: detector of uninitialized values use"
depends on HAVE_ARCH_KMSAN && HAVE_KMSAN_COMPILER
depends on SLUB && DEBUG_KERNEL && !KASAN && !KCSAN
depends on !PREEMPT_RT
select STACKDEPOT
select STACKDEPOT_ALWAYS_INIT
help

View file

@ -85,6 +85,7 @@ obj-$(CONFIG_TEST_BITMAP) += test_bitmap.o
obj-$(CONFIG_TEST_STRSCPY) += test_strscpy.o
obj-$(CONFIG_TEST_UUID) += test_uuid.o
obj-$(CONFIG_TEST_XARRAY) += test_xarray.o
obj-$(CONFIG_TEST_MAPLE_TREE) += test_maple_tree.o
obj-$(CONFIG_TEST_PARMAN) += test_parman.o
obj-$(CONFIG_TEST_KMOD) += test_kmod.o
obj-$(CONFIG_TEST_DEBUG_VIRTUAL) += test_debug_virtual.o

View file

@ -41,9 +41,6 @@ EXPORT_SYMBOL_GPL(setup_fault_attr);
static void fail_dump(struct fault_attr *attr)
{
if (attr->no_warn)
return;
if (attr->verbose > 0 && __ratelimit(&attr->ratelimit_state)) {
printk(KERN_NOTICE "FAULT_INJECTION: forcing a failure.\n"
"name %pd, interval %lu, probability %lu, "
@ -103,7 +100,7 @@ static inline bool fail_stacktrace(struct fault_attr *attr)
* http://www.nongnu.org/failmalloc/
*/
bool should_fail(struct fault_attr *attr, ssize_t size)
bool should_fail_ex(struct fault_attr *attr, ssize_t size, int flags)
{
if (in_task()) {
unsigned int fail_nth = READ_ONCE(current->fail_nth);
@ -146,13 +143,19 @@ bool should_fail(struct fault_attr *attr, ssize_t size)
return false;
fail:
fail_dump(attr);
if (!(flags & FAULT_NOWARN))
fail_dump(attr);
if (atomic_read(&attr->times) != -1)
atomic_dec_not_zero(&attr->times);
return true;
}
bool should_fail(struct fault_attr *attr, ssize_t size)
{
return should_fail_ex(attr, size, 0);
}
EXPORT_SYMBOL_GPL(should_fail);
#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS

View file

@ -183,10 +183,6 @@ static void ma_free_rcu(struct maple_node *node)
call_rcu(&node->rcu, mt_free_rcu);
}
static unsigned int mt_height(const struct maple_tree *mt)
{
return (mt->ma_flags & MT_FLAGS_HEIGHT_MASK) >> MT_FLAGS_HEIGHT_OFFSET;
}
static void mas_set_height(struct ma_state *mas)
{
@ -1209,7 +1205,6 @@ static inline void mas_push_node(struct ma_state *mas, struct maple_node *used)
static inline void mas_alloc_nodes(struct ma_state *mas, gfp_t gfp)
{
struct maple_alloc *node;
struct maple_alloc **nodep = &mas->alloc;
unsigned long allocated = mas_allocated(mas);
unsigned long success = allocated;
unsigned int requested = mas_alloc_req(mas);
@ -1263,8 +1258,7 @@ static inline void mas_alloc_nodes(struct ma_state *mas, gfp_t gfp)
node->node_count--;
success += count;
nodep = &node->slot[0];
node = *nodep;
node = node->slot[0];
requested -= count;
}
mas->alloc->total = success;
@ -1357,6 +1351,7 @@ static inline struct maple_enode *mas_start(struct ma_state *mas)
root = mas_root(mas);
/* Tree with nodes */
if (likely(xa_is_node(root))) {
mas->depth = 1;
mas->node = mte_safe_root(root);
return NULL;
}
@ -3608,8 +3603,7 @@ static inline int mas_commit_b_node(struct ma_wr_state *wr_mas,
node = mas_pop_node(wr_mas->mas);
node->parent = mas_mn(wr_mas->mas)->parent;
wr_mas->mas->node = mt_mk_node(node, b_type);
mab_mas_cp(b_node, 0, b_end, wr_mas->mas, true);
mab_mas_cp(b_node, 0, b_end, wr_mas->mas, false);
mas_replace(wr_mas->mas, false);
reuse_node:
mas_update_gap(wr_mas->mas);
@ -3733,7 +3727,6 @@ static bool mas_is_span_wr(struct ma_wr_state *wr_mas)
static inline void mas_wr_walk_descend(struct ma_wr_state *wr_mas)
{
wr_mas->mas->depth++;
wr_mas->type = mte_node_type(wr_mas->mas->node);
mas_wr_node_walk(wr_mas);
wr_mas->slots = ma_slots(wr_mas->node, wr_mas->type);
@ -3745,6 +3738,7 @@ static inline void mas_wr_walk_traverse(struct ma_wr_state *wr_mas)
wr_mas->mas->min = wr_mas->r_min;
wr_mas->mas->node = wr_mas->content;
wr_mas->mas->offset = 0;
wr_mas->mas->depth++;
}
/*
* mas_wr_walk() - Walk the tree for a write.
@ -4970,8 +4964,9 @@ static inline bool mas_anode_descend(struct ma_state *mas, unsigned long size)
{
enum maple_type type = mte_node_type(mas->node);
unsigned long pivot, min, gap = 0;
unsigned char count, offset;
unsigned long *gaps = NULL, *pivots = ma_pivots(mas_mn(mas), type);
unsigned char offset;
unsigned long *gaps;
unsigned long *pivots = ma_pivots(mas_mn(mas), type);
void __rcu **slots = ma_slots(mas_mn(mas), type);
bool found = false;
@ -4982,9 +4977,8 @@ static inline bool mas_anode_descend(struct ma_state *mas, unsigned long size)
gaps = ma_gaps(mte_to_node(mas->node), type);
offset = mas->offset;
count = mt_slots[type];
min = mas_safe_min(mas, pivots, offset);
for (; offset < count; offset++) {
for (; offset < mt_slots[type]; offset++) {
pivot = mas_safe_pivot(mas, pivots, offset, type);
if (offset && !pivot)
break;
@ -5010,8 +5004,6 @@ static inline bool mas_anode_descend(struct ma_state *mas, unsigned long size)
mas->min = min;
mas->max = pivot;
offset = 0;
type = mte_node_type(mas->node);
count = mt_slots[type];
break;
}
}
@ -5065,6 +5057,7 @@ void *mas_walk(struct ma_state *mas)
return entry;
}
EXPORT_SYMBOL_GPL(mas_walk);
static inline bool mas_rewind_node(struct ma_state *mas)
{
@ -5276,6 +5269,7 @@ int mas_empty_area(struct ma_state *mas, unsigned long min,
mas->last = mas->index + size - 1;
return 0;
}
EXPORT_SYMBOL_GPL(mas_empty_area);
/*
* mas_empty_area_rev() - Get the highest address within the range that is
@ -5339,6 +5333,7 @@ int mas_empty_area_rev(struct ma_state *mas, unsigned long min,
mas->index = mas->last - size + 1;
return 0;
}
EXPORT_SYMBOL_GPL(mas_empty_area_rev);
static inline int mas_alloc(struct ma_state *mas, void *entry,
unsigned long size, unsigned long *index)
@ -5660,6 +5655,7 @@ void *mas_store(struct ma_state *mas, void *entry)
mas_wr_store_entry(&wr_mas);
return wr_mas.content;
}
EXPORT_SYMBOL_GPL(mas_store);
/**
* mas_store_gfp() - Store a value into the tree.
@ -5686,6 +5682,7 @@ int mas_store_gfp(struct ma_state *mas, void *entry, gfp_t gfp)
return 0;
}
EXPORT_SYMBOL_GPL(mas_store_gfp);
/**
* mas_store_prealloc() - Store a value into the tree using memory
@ -5703,6 +5700,7 @@ void mas_store_prealloc(struct ma_state *mas, void *entry)
BUG_ON(mas_is_err(mas));
mas_destroy(mas);
}
EXPORT_SYMBOL_GPL(mas_store_prealloc);
/**
* mas_preallocate() - Preallocate enough nodes for a store operation
@ -5772,6 +5770,7 @@ void mas_destroy(struct ma_state *mas)
}
mas->alloc = NULL;
}
EXPORT_SYMBOL_GPL(mas_destroy);
/*
* mas_expected_entries() - Set the expected number of entries that will be inserted.
@ -5833,6 +5832,7 @@ int mas_expected_entries(struct ma_state *mas, unsigned long nr_entries)
return ret;
}
EXPORT_SYMBOL_GPL(mas_expected_entries);
/**
* mas_next() - Get the next entry.
@ -6013,6 +6013,7 @@ void *mas_find(struct ma_state *mas, unsigned long max)
/* Retries on dead nodes handled by mas_next_entry */
return mas_next_entry(mas, max);
}
EXPORT_SYMBOL_GPL(mas_find);
/**
* mas_find_rev: On the first call, find the first non-null entry at or below
@ -6059,7 +6060,7 @@ void *mas_find_rev(struct ma_state *mas, unsigned long min)
/* Retries on dead nodes handled by mas_next_entry */
return mas_prev_entry(mas, min);
}
EXPORT_SYMBOL_GPL(mas_find);
EXPORT_SYMBOL_GPL(mas_find_rev);
/**
* mas_erase() - Find the range in which index resides and erase the entire
@ -6541,8 +6542,27 @@ static inline int mas_dead_node(struct ma_state *mas, unsigned long index)
mas_rewalk(mas, index);
return 1;
}
#endif /* not defined __KERNEL__ */
void mt_cache_shrink(void)
{
}
#else
/*
* mt_cache_shrink() - For testing, don't use this.
*
* Certain testcases can trigger an OOM when combined with other memory
* debugging configuration options. This function is used to reduce the
* possibility of an out of memory even due to kmem_cache objects remaining
* around for longer than usual.
*/
void mt_cache_shrink(void)
{
kmem_cache_shrink(maple_node_cache);
}
EXPORT_SYMBOL_GPL(mt_cache_shrink);
#endif /* not defined __KERNEL__ */
/*
* mas_get_slot() - Get the entry in the maple state node stored at @offset.
* @mas: The maple state
@ -6816,6 +6836,7 @@ void mt_dump(const struct maple_tree *mt)
else if (entry)
mt_dump_node(mt, entry, 0, mt_max[mte_node_type(entry)], 0);
}
EXPORT_SYMBOL_GPL(mt_dump);
/*
* Calculate the maximum gap in a node and check if that's what is reported in
@ -7126,5 +7147,6 @@ void mt_validate(struct maple_tree *mt)
rcu_read_unlock();
}
EXPORT_SYMBOL_GPL(mt_validate);
#endif /* CONFIG_DEBUG_MAPLE_TREE */

File diff suppressed because it is too large Load diff

View file

@ -984,22 +984,6 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
goto isolate_fail;
}
/*
* Migration will fail if an anonymous page is pinned in memory,
* so avoid taking lru_lock and isolating it unnecessarily in an
* admittedly racy check.
*/
mapping = page_mapping(page);
if (!mapping && page_count(page) > page_mapcount(page))
goto isolate_fail;
/*
* Only allow to migrate anonymous pages in GFP_NOFS context
* because those do not depend on fs locks.
*/
if (!(cc->gfp_mask & __GFP_FS) && mapping)
goto isolate_fail;
/*
* Be careful not to clear PageLRU until after we're
* sure the page is not being freed elsewhere -- the
@ -1008,6 +992,22 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
if (unlikely(!get_page_unless_zero(page)))
goto isolate_fail;
/*
* Migration will fail if an anonymous page is pinned in memory,
* so avoid taking lru_lock and isolating it unnecessarily in an
* admittedly racy check.
*/
mapping = page_mapping(page);
if (!mapping && (page_count(page) - 1) > total_mapcount(page))
goto isolate_fail_put;
/*
* Only allow to migrate anonymous pages in GFP_NOFS context
* because those do not depend on fs locks.
*/
if (!(cc->gfp_mask & __GFP_FS) && mapping)
goto isolate_fail_put;
/* Only take pages on LRU: a check now makes later tests safe */
if (!PageLRU(page))
goto isolate_fail_put;

View file

@ -890,6 +890,7 @@ static ssize_t dbgfs_mk_context_write(struct file *file,
static int dbgfs_rm_context(char *name)
{
struct dentry *root, *dir, **new_dirs;
struct inode *inode;
struct damon_ctx **new_ctxs;
int i, j;
int ret = 0;
@ -905,6 +906,12 @@ static int dbgfs_rm_context(char *name)
if (!dir)
return -ENOENT;
inode = d_inode(dir);
if (!S_ISDIR(inode->i_mode)) {
ret = -EINVAL;
goto out_dput;
}
new_dirs = kmalloc_array(dbgfs_nr_ctxs - 1, sizeof(*dbgfs_dirs),
GFP_KERNEL);
if (!new_dirs) {

View file

@ -2283,12 +2283,54 @@ static struct damos *damon_sysfs_mk_scheme(
&wmarks);
}
static void damon_sysfs_update_scheme(struct damos *scheme,
struct damon_sysfs_scheme *sysfs_scheme)
{
struct damon_sysfs_access_pattern *access_pattern =
sysfs_scheme->access_pattern;
struct damon_sysfs_quotas *sysfs_quotas = sysfs_scheme->quotas;
struct damon_sysfs_weights *sysfs_weights = sysfs_quotas->weights;
struct damon_sysfs_watermarks *sysfs_wmarks = sysfs_scheme->watermarks;
scheme->pattern.min_sz_region = access_pattern->sz->min;
scheme->pattern.max_sz_region = access_pattern->sz->max;
scheme->pattern.min_nr_accesses = access_pattern->nr_accesses->min;
scheme->pattern.max_nr_accesses = access_pattern->nr_accesses->max;
scheme->pattern.min_age_region = access_pattern->age->min;
scheme->pattern.max_age_region = access_pattern->age->max;
scheme->action = sysfs_scheme->action;
scheme->quota.ms = sysfs_quotas->ms;
scheme->quota.sz = sysfs_quotas->sz;
scheme->quota.reset_interval = sysfs_quotas->reset_interval_ms;
scheme->quota.weight_sz = sysfs_weights->sz;
scheme->quota.weight_nr_accesses = sysfs_weights->nr_accesses;
scheme->quota.weight_age = sysfs_weights->age;
scheme->wmarks.metric = sysfs_wmarks->metric;
scheme->wmarks.interval = sysfs_wmarks->interval_us;
scheme->wmarks.high = sysfs_wmarks->high;
scheme->wmarks.mid = sysfs_wmarks->mid;
scheme->wmarks.low = sysfs_wmarks->low;
}
static int damon_sysfs_set_schemes(struct damon_ctx *ctx,
struct damon_sysfs_schemes *sysfs_schemes)
{
int i;
struct damos *scheme, *next;
int i = 0;
for (i = 0; i < sysfs_schemes->nr; i++) {
damon_for_each_scheme_safe(scheme, next, ctx) {
if (i < sysfs_schemes->nr)
damon_sysfs_update_scheme(scheme,
sysfs_schemes->schemes_arr[i]);
else
damon_destroy_scheme(scheme);
i++;
}
for (; i < sysfs_schemes->nr; i++) {
struct damos *scheme, *next;
scheme = damon_sysfs_mk_scheme(sysfs_schemes->schemes_arr[i]);
@ -2339,6 +2381,10 @@ static int damon_sysfs_upd_schemes_stats(struct damon_sysfs_kdamond *kdamond)
damon_for_each_scheme(scheme, ctx) {
struct damon_sysfs_stats *sysfs_stats;
/* user could have removed the scheme sysfs dir */
if (schemes_idx >= sysfs_schemes->nr)
break;
sysfs_stats = sysfs_schemes->schemes_arr[schemes_idx++]->stats;
sysfs_stats->nr_tried = scheme->stat.nr_tried;
sysfs_stats->sz_tried = scheme->stat.sz_tried;

View file

@ -16,6 +16,8 @@ static struct {
bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags)
{
int flags = 0;
/* No fault-injection for bootstrap cache */
if (unlikely(s == kmem_cache))
return false;
@ -30,10 +32,16 @@ bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags)
if (failslab.cache_filter && !(s->flags & SLAB_FAILSLAB))
return false;
/*
* In some cases, it expects to specify __GFP_NOWARN
* to avoid printing any information(not just a warning),
* thus avoiding deadlocks. See commit 6b9dbedbe349 for
* details.
*/
if (gfpflags & __GFP_NOWARN)
failslab.attr.no_warn = true;
flags |= FAULT_NOWARN;
return should_fail(&failslab.attr, s->object_size);
return should_fail_ex(&failslab.attr, s->object_size, flags);
}
static int __init setup_failslab(char *str)

View file

@ -2206,9 +2206,12 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
entry = pte_wrprotect(entry);
if (!young)
entry = pte_mkold(entry);
/* NOTE: this may set soft-dirty too on some archs */
if (dirty)
entry = pte_mkdirty(entry);
/*
* NOTE: we don't do pte_mkdirty when dirty==true
* because it breaks sparc64 which can sigsegv
* random process. Need to revisit when we figure
* out what is special with sparc64.
*/
if (soft_dirty)
entry = pte_mksoft_dirty(entry);
if (uffd_wp)

View file

@ -1800,6 +1800,7 @@ static bool __prep_compound_gigantic_page(struct page *page, unsigned int order,
/* we rely on prep_new_huge_page to set the destructor */
set_compound_order(page, order);
__ClearPageReserved(page);
__SetPageHead(page);
for (i = 0; i < nr_pages; i++) {
p = nth_page(page, i);
@ -1816,7 +1817,8 @@ static bool __prep_compound_gigantic_page(struct page *page, unsigned int order,
* on the head page when they need know if put_page() is needed
* after get_user_pages().
*/
__ClearPageReserved(p);
if (i != 0) /* head page cleared above */
__ClearPageReserved(p);
/*
* Subtle and very unlikely
*
@ -5199,17 +5201,22 @@ void __unmap_hugepage_range_final(struct mmu_gather *tlb,
__unmap_hugepage_range(tlb, vma, start, end, ref_page, zap_flags);
/*
* Unlock and free the vma lock before releasing i_mmap_rwsem. When
* the vma_lock is freed, this makes the vma ineligible for pmd
* sharing. And, i_mmap_rwsem is required to set up pmd sharing.
* This is important as page tables for this unmapped range will
* be asynchrously deleted. If the page tables are shared, there
* will be issues when accessed by someone else.
*/
__hugetlb_vma_unlock_write_free(vma);
i_mmap_unlock_write(vma->vm_file->f_mapping);
if (zap_flags & ZAP_FLAG_UNMAP) { /* final unmap */
/*
* Unlock and free the vma lock before releasing i_mmap_rwsem.
* When the vma_lock is freed, this makes the vma ineligible
* for pmd sharing. And, i_mmap_rwsem is required to set up
* pmd sharing. This is important as page tables for this
* unmapped range will be asynchrously deleted. If the page
* tables are shared, there will be issues when accessed by
* someone else.
*/
__hugetlb_vma_unlock_write_free(vma);
i_mmap_unlock_write(vma->vm_file->f_mapping);
} else {
i_mmap_unlock_write(vma->vm_file->f_mapping);
hugetlb_vma_unlock_write(vma);
}
}
void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
@ -6103,6 +6110,10 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
ptl = huge_pte_lock(h, dst_mm, dst_pte);
ret = -EIO;
if (PageHWPoison(page))
goto out_release_unlock;
/*
* We allow to overwrite a pte marker: consider when both MISSING|WP
* registered, we firstly wr-protect a none pte which has no page cache

View file

@ -11,6 +11,7 @@
#define pr_fmt(fmt) "HugeTLB: " fmt
#include <linux/pgtable.h>
#include <linux/moduleparam.h>
#include <linux/bootmem_info.h>
#include <asm/pgalloc.h>
#include <asm/tlbflush.h>

View file

@ -75,18 +75,23 @@ static int get_stack_skipnr(const unsigned long stack_entries[], int num_entries
if (str_has_prefix(buf, ARCH_FUNC_PREFIX "kfence_") ||
str_has_prefix(buf, ARCH_FUNC_PREFIX "__kfence_") ||
str_has_prefix(buf, ARCH_FUNC_PREFIX "__kmem_cache_free") ||
!strncmp(buf, ARCH_FUNC_PREFIX "__slab_free", len)) {
/*
* In case of tail calls from any of the below
* to any of the above.
* In case of tail calls from any of the below to any of
* the above, optimized by the compiler such that the
* stack trace would omit the initial entry point below.
*/
fallback = skipnr + 1;
}
/* Also the *_bulk() variants by only checking prefixes. */
/*
* The below list should only include the initial entry points
* into the slab allocators. Includes the *_bulk() variants by
* checking prefixes.
*/
if (str_has_prefix(buf, ARCH_FUNC_PREFIX "kfree") ||
str_has_prefix(buf, ARCH_FUNC_PREFIX "kmem_cache_free") ||
str_has_prefix(buf, ARCH_FUNC_PREFIX "__kmem_cache_free") ||
str_has_prefix(buf, ARCH_FUNC_PREFIX "__kmalloc") ||
str_has_prefix(buf, ARCH_FUNC_PREFIX "kmem_cache_alloc"))
goto found;

View file

@ -97,8 +97,8 @@ struct collapse_control {
/* Num pages scanned per node */
u32 node_load[MAX_NUMNODES];
/* Last target selected in hpage_collapse_find_target_node() */
int last_target_node;
/* nodemask for allocation fallback */
nodemask_t alloc_nmask;
};
/**
@ -734,7 +734,6 @@ static void khugepaged_alloc_sleep(void)
struct collapse_control khugepaged_collapse_control = {
.is_khugepaged = true,
.last_target_node = NUMA_NO_NODE,
};
static bool hpage_collapse_scan_abort(int nid, struct collapse_control *cc)
@ -783,16 +782,11 @@ static int hpage_collapse_find_target_node(struct collapse_control *cc)
target_node = nid;
}
/* do some balance if several nodes have the same hit record */
if (target_node <= cc->last_target_node)
for (nid = cc->last_target_node + 1; nid < MAX_NUMNODES;
nid++)
if (max_value == cc->node_load[nid]) {
target_node = nid;
break;
}
for_each_online_node(nid) {
if (max_value == cc->node_load[nid])
node_set(nid, cc->alloc_nmask);
}
cc->last_target_node = target_node;
return target_node;
}
#else
@ -802,9 +796,10 @@ static int hpage_collapse_find_target_node(struct collapse_control *cc)
}
#endif
static bool hpage_collapse_alloc_page(struct page **hpage, gfp_t gfp, int node)
static bool hpage_collapse_alloc_page(struct page **hpage, gfp_t gfp, int node,
nodemask_t *nmask)
{
*hpage = __alloc_pages_node(node, gfp, HPAGE_PMD_ORDER);
*hpage = __alloc_pages(gfp, HPAGE_PMD_ORDER, node, nmask);
if (unlikely(!*hpage)) {
count_vm_event(THP_COLLAPSE_ALLOC_FAILED);
return false;
@ -955,12 +950,11 @@ static int __collapse_huge_page_swapin(struct mm_struct *mm,
static int alloc_charge_hpage(struct page **hpage, struct mm_struct *mm,
struct collapse_control *cc)
{
/* Only allocate from the target node */
gfp_t gfp = (cc->is_khugepaged ? alloc_hugepage_khugepaged_gfpmask() :
GFP_TRANSHUGE) | __GFP_THISNODE;
GFP_TRANSHUGE);
int node = hpage_collapse_find_target_node(cc);
if (!hpage_collapse_alloc_page(hpage, gfp, node))
if (!hpage_collapse_alloc_page(hpage, gfp, node, &cc->alloc_nmask))
return SCAN_ALLOC_HUGE_PAGE_FAIL;
if (unlikely(mem_cgroup_charge(page_folio(*hpage), mm, gfp)))
return SCAN_CGROUP_CHARGE_FAIL;
@ -1057,6 +1051,7 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address,
_pmd = pmdp_collapse_flush(vma, address, pmd);
spin_unlock(pmd_ptl);
mmu_notifier_invalidate_range_end(&range);
tlb_remove_table_sync_one();
spin_lock(pte_ptl);
result = __collapse_huge_page_isolate(vma, address, pte, cc,
@ -1144,6 +1139,7 @@ static int hpage_collapse_scan_pmd(struct mm_struct *mm,
goto out;
memset(cc->node_load, 0, sizeof(cc->node_load));
nodes_clear(cc->alloc_nmask);
pte = pte_offset_map_lock(mm, pmd, address, &ptl);
for (_address = address, _pte = pte; _pte < pte + HPAGE_PMD_NR;
_pte++, _address += PAGE_SIZE) {
@ -1384,16 +1380,43 @@ static int set_huge_pmd(struct vm_area_struct *vma, unsigned long addr,
return SCAN_SUCCEED;
}
/*
* A note about locking:
* Trying to take the page table spinlocks would be useless here because those
* are only used to synchronize:
*
* - modifying terminal entries (ones that point to a data page, not to another
* page table)
* - installing *new* non-terminal entries
*
* Instead, we need roughly the same kind of protection as free_pgtables() or
* mm_take_all_locks() (but only for a single VMA):
* The mmap lock together with this VMA's rmap locks covers all paths towards
* the page table entries we're messing with here, except for hardware page
* table walks and lockless_pages_from_mm().
*/
static void collapse_and_free_pmd(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long addr, pmd_t *pmdp)
{
spinlock_t *ptl;
pmd_t pmd;
struct mmu_notifier_range range;
mmap_assert_write_locked(mm);
ptl = pmd_lock(vma->vm_mm, pmdp);
if (vma->vm_file)
lockdep_assert_held_write(&vma->vm_file->f_mapping->i_mmap_rwsem);
/*
* All anon_vmas attached to the VMA have the same root and are
* therefore locked by the same lock.
*/
if (vma->anon_vma)
lockdep_assert_held_write(&vma->anon_vma->root->rwsem);
mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, NULL, mm, addr,
addr + HPAGE_PMD_SIZE);
mmu_notifier_invalidate_range_start(&range);
pmd = pmdp_collapse_flush(vma, addr, pmdp);
spin_unlock(ptl);
tlb_remove_table_sync_one();
mmu_notifier_invalidate_range_end(&range);
mm_dec_nr_ptes(mm);
page_table_check_pte_clear_range(mm, addr, pmd);
pte_free(mm, pmd_pgtable(pmd));
@ -1444,6 +1467,14 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr,
if (!hugepage_vma_check(vma, vma->vm_flags, false, false, false))
return SCAN_VMA_CHECK;
/*
* Symmetry with retract_page_tables(): Exclude MAP_PRIVATE mappings
* that got written to. Without this, we'd have to also lock the
* anon_vma if one exists.
*/
if (vma->anon_vma)
return SCAN_VMA_CHECK;
/* Keep pmd pgtable for uffd-wp; see comment in retract_page_tables() */
if (userfaultfd_wp(vma))
return SCAN_PTE_UFFD_WP;
@ -1477,6 +1508,20 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr,
goto drop_hpage;
}
/*
* We need to lock the mapping so that from here on, only GUP-fast and
* hardware page walks can access the parts of the page tables that
* we're operating on.
* See collapse_and_free_pmd().
*/
i_mmap_lock_write(vma->vm_file->f_mapping);
/*
* This spinlock should be unnecessary: Nobody else should be accessing
* the page tables under spinlock protection here, only
* lockless_pages_from_mm() and the hardware page walker can access page
* tables while all the high-level locks are held in write mode.
*/
start_pte = pte_offset_map_lock(mm, pmd, haddr, &ptl);
result = SCAN_FAIL;
@ -1531,6 +1576,8 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr,
/* step 4: remove pte entries */
collapse_and_free_pmd(mm, vma, haddr, pmd);
i_mmap_unlock_write(vma->vm_file->f_mapping);
maybe_install_pmd:
/* step 5: install pmd entry */
result = install_pmd
@ -1544,6 +1591,7 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr,
abort:
pte_unmap_unlock(start_pte, ptl);
i_mmap_unlock_write(vma->vm_file->f_mapping);
goto drop_hpage;
}
@ -1600,7 +1648,8 @@ static int retract_page_tables(struct address_space *mapping, pgoff_t pgoff,
* An alternative would be drop the check, but check that page
* table is clear before calling pmdp_collapse_flush() under
* ptl. It has higher chance to recover THP for the VMA, but
* has higher cost too.
* has higher cost too. It would also probably require locking
* the anon_vma.
*/
if (vma->anon_vma) {
result = SCAN_PAGE_ANON;
@ -2077,6 +2126,7 @@ static int hpage_collapse_scan_file(struct mm_struct *mm, unsigned long addr,
present = 0;
swap = 0;
memset(cc->node_load, 0, sizeof(cc->node_load));
nodes_clear(cc->alloc_nmask);
rcu_read_lock();
xas_for_each(&xas, page, start + HPAGE_PMD_NR - 1) {
if (xas_retry(&xas, page))
@ -2157,8 +2207,7 @@ static int hpage_collapse_scan_file(struct mm_struct *mm, unsigned long addr,
}
}
trace_mm_khugepaged_scan_file(mm, page, file->f_path.dentry->d_iname,
present, swap, result);
trace_mm_khugepaged_scan_file(mm, page, file, present, swap, result);
return result;
}
#else
@ -2576,7 +2625,6 @@ int madvise_collapse(struct vm_area_struct *vma, struct vm_area_struct **prev,
if (!cc)
return -ENOMEM;
cc->is_khugepaged = false;
cc->last_target_node = NUMA_NO_NODE;
mmgrab(mm);
lru_add_drain_all();
@ -2602,6 +2650,7 @@ int madvise_collapse(struct vm_area_struct *vma, struct vm_area_struct **prev,
}
mmap_assert_locked(mm);
memset(cc->node_load, 0, sizeof(cc->node_load));
nodes_clear(cc->alloc_nmask);
if (IS_ENABLED(CONFIG_SHMEM) && vma->vm_file) {
struct file *file = get_file(vma->vm_file);
pgoff_t pgoff = linear_page_index(vma, addr);

View file

@ -124,6 +124,8 @@ static __always_inline bool kmsan_in_runtime(void)
{
if ((hardirq_count() >> HARDIRQ_SHIFT) > 1)
return true;
if (in_nmi())
return true;
return kmsan_get_context()->kmsan_in_runtime;
}

View file

@ -772,8 +772,8 @@ static int madvise_free_single_vma(struct vm_area_struct *vma,
* Application no longer needs these pages. If the pages are dirty,
* it's OK to just throw them away. The app will be more careful about
* data it wants to keep. Be sure to free swap resources too. The
* zap_page_range call sets things up for shrink_active_list to actually free
* these pages later if no one else has touched them in the meantime,
* zap_page_range_single call sets things up for shrink_active_list to actually
* free these pages later if no one else has touched them in the meantime,
* although we could add these pages to a global reuse list for
* shrink_active_list to pick up before reclaiming other pages.
*
@ -790,7 +790,7 @@ static int madvise_free_single_vma(struct vm_area_struct *vma,
static long madvise_dontneed_single_vma(struct vm_area_struct *vma,
unsigned long start, unsigned long end)
{
zap_page_range(vma, start, end - start);
zap_page_range_single(vma, start, end - start, NULL);
return 0;
}

View file

@ -3026,7 +3026,7 @@ struct obj_cgroup *get_obj_cgroup_from_page(struct page *page)
{
struct obj_cgroup *objcg;
if (!memcg_kmem_enabled() || memcg_kmem_bypass())
if (!memcg_kmem_enabled())
return NULL;
if (PageMemcgKmem(page)) {

View file

@ -1093,6 +1093,7 @@ static int me_huge_page(struct page_state *ps, struct page *p)
int res;
struct page *hpage = compound_head(p);
struct address_space *mapping;
bool extra_pins = false;
if (!PageHuge(hpage))
return MF_DELAYED;
@ -1100,6 +1101,8 @@ static int me_huge_page(struct page_state *ps, struct page *p)
mapping = page_mapping(hpage);
if (mapping) {
res = truncate_error_page(hpage, page_to_pfn(p), mapping);
/* The page is kept in page cache. */
extra_pins = true;
unlock_page(hpage);
} else {
unlock_page(hpage);
@ -1117,7 +1120,7 @@ static int me_huge_page(struct page_state *ps, struct page *p)
}
}
if (has_extra_refcount(ps, p, false))
if (has_extra_refcount(ps, p, extra_pins))
res = MF_FAILED;
return res;

View file

@ -1341,15 +1341,6 @@ copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma)
return ret;
}
/*
* Parameter block passed down to zap_pte_range in exceptional cases.
*/
struct zap_details {
struct folio *single_folio; /* Locked folio to be unmapped */
bool even_cows; /* Zap COWed private pages too? */
zap_flags_t zap_flags; /* Extra flags for zapping */
};
/* Whether we should zap all COWed (private) pages too */
static inline bool should_zap_cows(struct zap_details *details)
{
@ -1718,7 +1709,7 @@ void unmap_vmas(struct mmu_gather *tlb, struct maple_tree *mt,
{
struct mmu_notifier_range range;
struct zap_details details = {
.zap_flags = ZAP_FLAG_DROP_MARKER,
.zap_flags = ZAP_FLAG_DROP_MARKER | ZAP_FLAG_UNMAP,
/* Careful - we need to zap private pages too! */
.even_cows = true,
};
@ -1772,19 +1763,27 @@ void zap_page_range(struct vm_area_struct *vma, unsigned long start,
*
* The range must fit into one VMA.
*/
static void zap_page_range_single(struct vm_area_struct *vma, unsigned long address,
void zap_page_range_single(struct vm_area_struct *vma, unsigned long address,
unsigned long size, struct zap_details *details)
{
const unsigned long end = address + size;
struct mmu_notifier_range range;
struct mmu_gather tlb;
lru_add_drain();
mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm,
address, address + size);
address, end);
if (is_vm_hugetlb_page(vma))
adjust_range_if_pmd_sharing_possible(vma, &range.start,
&range.end);
tlb_gather_mmu(&tlb, vma->vm_mm);
update_hiwater_rss(vma->vm_mm);
mmu_notifier_invalidate_range_start(&range);
unmap_single_vma(&tlb, vma, address, range.end, details);
/*
* unmap 'address-end' not 'range.start-range.end' as range
* could have been expanded for hugetlb pmd sharing.
*/
unmap_single_vma(&tlb, vma, address, end, details);
mmu_notifier_invalidate_range_end(&range);
tlb_finish_mmu(&tlb);
}
@ -3761,7 +3760,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
*/
get_page(vmf->page);
pte_unmap_unlock(vmf->pte, vmf->ptl);
vmf->page->pgmap->ops->migrate_to_ram(vmf);
ret = vmf->page->pgmap->ops->migrate_to_ram(vmf);
put_page(vmf->page);
} else if (is_hwpoison_entry(entry)) {
ret = VM_FAULT_HWPOISON;

View file

@ -335,6 +335,7 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid)
WARN(1, "File system DAX not supported\n");
return ERR_PTR(-EINVAL);
}
params.pgprot = pgprot_decrypted(params.pgprot);
break;
case MEMORY_DEVICE_GENERIC:
break;

View file

@ -357,7 +357,8 @@ static bool migrate_vma_check_page(struct page *page, struct page *fault_page)
}
/*
* Unmaps pages for migration. Returns number of unmapped pages.
* Unmaps pages for migration. Returns number of source pfns marked as
* migrating.
*/
static unsigned long migrate_device_unmap(unsigned long *src_pfns,
unsigned long npages,
@ -373,8 +374,11 @@ static unsigned long migrate_device_unmap(unsigned long *src_pfns,
struct page *page = migrate_pfn_to_page(src_pfns[i]);
struct folio *folio;
if (!page)
if (!page) {
if (src_pfns[i] & MIGRATE_PFN_MIGRATE)
unmapped++;
continue;
}
/* ZONE_DEVICE pages are not on LRU */
if (!is_zone_device_page(page)) {

View file

@ -456,7 +456,7 @@ void vma_mas_remove(struct vm_area_struct *vma, struct ma_state *mas)
* vma_mas_szero() - Set a given range to zero. Used when modifying a
* vm_area_struct start or end.
*
* @mm: The struct_mm
* @mas: The maple tree ma_state
* @start: The start address to zero
* @end: The end address to zero.
*/
@ -2674,6 +2674,8 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
error = -EINVAL;
if (file)
goto close_and_free_vma;
else if (vma->vm_file)
goto unmap_and_free_vma;
else
goto free_vma;
}
@ -2682,6 +2684,8 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
error = -ENOMEM;
if (file)
goto close_and_free_vma;
else if (vma->vm_file)
goto unmap_and_free_vma;
else
goto free_vma;
}
@ -2751,7 +2755,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
/* Undo any partial mapping done by a device driver. */
unmap_region(mm, mas.tree, vma, prev, next, vma->vm_start, vma->vm_end);
if (vm_flags & VM_SHARED)
if (file && (vm_flags & VM_SHARED))
mapping_unmap_writable(file->f_mapping);
free_vma:
vm_area_free(vma);

View file

@ -153,7 +153,7 @@ static void tlb_remove_table_smp_sync(void *arg)
/* Simply deliver the interrupt */
}
static void tlb_remove_table_sync_one(void)
void tlb_remove_table_sync_one(void)
{
/*
* This isn't an RCU grace period and hence the page-tables cannot be
@ -177,8 +177,6 @@ static void tlb_remove_table_free(struct mmu_table_batch *batch)
#else /* !CONFIG_MMU_GATHER_RCU_TABLE_FREE */
static void tlb_remove_table_sync_one(void) { }
static void tlb_remove_table_free(struct mmu_table_batch *batch)
{
__tlb_remove_table_free(batch);

View file

@ -3887,6 +3887,8 @@ __setup("fail_page_alloc=", setup_fail_page_alloc);
static bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
{
int flags = 0;
if (order < fail_page_alloc.min_order)
return false;
if (gfp_mask & __GFP_NOFAIL)
@ -3897,10 +3899,11 @@ static bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
(gfp_mask & __GFP_DIRECT_RECLAIM))
return false;
/* See comment in __should_failslab() */
if (gfp_mask & __GFP_NOWARN)
fail_page_alloc.attr.no_warn = true;
flags |= FAULT_NOWARN;
return should_fail(&fail_page_alloc.attr, 1 << order);
return should_fail_ex(&fail_page_alloc.attr, 1 << order, flags);
}
#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS

View file

@ -166,7 +166,7 @@ struct page_ext *page_ext_get(struct page *page)
/**
* page_ext_put() - Working with page extended information is done.
* @page_ext - Page extended information received from page_ext_get().
* @page_ext: Page extended information received from page_ext_get().
*
* The page extended information of the page may not be valid after this
* function is called.

View file

@ -973,23 +973,23 @@ static int scan_swap_map_slots(struct swap_info_struct *si,
scan:
spin_unlock(&si->lock);
while (++offset <= READ_ONCE(si->highest_bit)) {
if (swap_offset_available_and_locked(si, offset))
goto checks;
if (unlikely(--latency_ration < 0)) {
cond_resched();
latency_ration = LATENCY_LIMIT;
scanned_many = true;
}
if (swap_offset_available_and_locked(si, offset))
goto checks;
}
offset = si->lowest_bit;
while (offset < scan_base) {
if (swap_offset_available_and_locked(si, offset))
goto checks;
if (unlikely(--latency_ration < 0)) {
cond_resched();
latency_ration = LATENCY_LIMIT;
scanned_many = true;
}
if (swap_offset_available_and_locked(si, offset))
goto checks;
offset++;
}
spin_lock(&si->lock);

View file

@ -64,7 +64,7 @@ int mfill_atomic_install_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd,
pte_t _dst_pte, *dst_pte;
bool writable = dst_vma->vm_flags & VM_WRITE;
bool vm_shared = dst_vma->vm_flags & VM_SHARED;
bool page_in_cache = page->mapping;
bool page_in_cache = page_mapping(page);
spinlock_t *ptl;
struct inode *inode;
pgoff_t offset, max_off;

View file

@ -2514,8 +2514,20 @@ static unsigned long shrink_inactive_list(unsigned long nr_to_scan,
* the flushers simply cannot keep up with the allocation
* rate. Nudge the flusher threads in case they are asleep.
*/
if (stat.nr_unqueued_dirty == nr_taken)
if (stat.nr_unqueued_dirty == nr_taken) {
wakeup_flusher_threads(WB_REASON_VMSCAN);
/*
* For cgroupv1 dirty throttling is achieved by waking up
* the kernel flusher here and later waiting on folios
* which are in writeback to finish (see shrink_folio_list()).
*
* Flusher may not be able to issue writeback quickly
* enough for cgroupv1 writeback throttling to work
* on a large system.
*/
if (!writeback_throttling_sane(sc))
reclaim_throttle(pgdat, VMSCAN_THROTTLE_WRITEBACK);
}
sc->nr.dirty += stat.nr_dirty;
sc->nr.congested += stat.nr_congested;
@ -3977,7 +3989,7 @@ static void walk_pmd_range_locked(pud_t *pud, unsigned long next, struct vm_area
goto next;
if (!pmd_trans_huge(pmd[i])) {
if (IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG) &&
if (arch_has_hw_nonleaf_pmd_young() &&
get_cap(LRU_GEN_NONLEAF_YOUNG))
pmdp_test_and_clear_young(vma, addr, pmd + i);
goto next;
@ -4075,14 +4087,14 @@ static void walk_pmd_range(pud_t *pud, unsigned long start, unsigned long end,
#endif
walk->mm_stats[MM_NONLEAF_TOTAL]++;
#ifdef CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG
if (get_cap(LRU_GEN_NONLEAF_YOUNG)) {
if (arch_has_hw_nonleaf_pmd_young() &&
get_cap(LRU_GEN_NONLEAF_YOUNG)) {
if (!pmd_young(val))
continue;
walk_pmd_range_locked(pud, addr, vma, args, bitmap, &pos);
}
#endif
if (!walk->force_scan && !test_bloom_filter(walk->lruvec, walk->max_seq, pmd + i))
continue;
@ -4973,10 +4985,13 @@ static int evict_folios(struct lruvec *lruvec, struct scan_control *sc, int swap
int scanned;
int reclaimed;
LIST_HEAD(list);
LIST_HEAD(clean);
struct folio *folio;
struct folio *next;
enum vm_event_item item;
struct reclaim_stat stat;
struct lru_gen_mm_walk *walk;
bool skip_retry = false;
struct mem_cgroup *memcg = lruvec_memcg(lruvec);
struct pglist_data *pgdat = lruvec_pgdat(lruvec);
@ -4993,20 +5008,37 @@ static int evict_folios(struct lruvec *lruvec, struct scan_control *sc, int swap
if (list_empty(&list))
return scanned;
retry:
reclaimed = shrink_folio_list(&list, pgdat, sc, &stat, false);
sc->nr_reclaimed += reclaimed;
list_for_each_entry(folio, &list, lru) {
/* restore LRU_REFS_FLAGS cleared by isolate_folio() */
if (folio_test_workingset(folio))
folio_set_referenced(folio);
list_for_each_entry_safe_reverse(folio, next, &list, lru) {
if (!folio_evictable(folio)) {
list_del(&folio->lru);
folio_putback_lru(folio);
continue;
}
/* don't add rejected pages to the oldest generation */
if (folio_test_reclaim(folio) &&
(folio_test_dirty(folio) || folio_test_writeback(folio)))
folio_clear_active(folio);
else
folio_set_active(folio);
(folio_test_dirty(folio) || folio_test_writeback(folio))) {
/* restore LRU_REFS_FLAGS cleared by isolate_folio() */
if (folio_test_workingset(folio))
folio_set_referenced(folio);
continue;
}
if (skip_retry || folio_test_active(folio) || folio_test_referenced(folio) ||
folio_mapped(folio) || folio_test_locked(folio) ||
folio_test_dirty(folio) || folio_test_writeback(folio)) {
/* don't add rejected folios to the oldest generation */
set_mask_bits(&folio->flags, LRU_REFS_MASK | LRU_REFS_FLAGS,
BIT(PG_active));
continue;
}
/* retry folios that may have missed folio_rotate_reclaimable() */
list_move(&folio->lru, &clean);
sc->nr_scanned -= folio_nr_pages(folio);
}
spin_lock_irq(&lruvec->lru_lock);
@ -5028,7 +5060,13 @@ static int evict_folios(struct lruvec *lruvec, struct scan_control *sc, int swap
mem_cgroup_uncharge_list(&list);
free_unref_page_list(&list);
sc->nr_reclaimed += reclaimed;
INIT_LIST_HEAD(&list);
list_splice_init(&clean, &list);
if (!list_empty(&list)) {
skip_retry = true;
goto retry;
}
if (need_swapping && type == LRU_GEN_ANON)
*need_swapping = true;
@ -5356,7 +5394,7 @@ static ssize_t show_enabled(struct kobject *kobj, struct kobj_attribute *attr, c
if (arch_has_hw_pte_young() && get_cap(LRU_GEN_MM_WALK))
caps |= BIT(LRU_GEN_MM_WALK);
if (IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG) && get_cap(LRU_GEN_NONLEAF_YOUNG))
if (arch_has_hw_nonleaf_pmd_young() && get_cap(LRU_GEN_NONLEAF_YOUNG))
caps |= BIT(LRU_GEN_NONLEAF_YOUNG);
return snprintf(buf, PAGE_SIZE, "0x%04x\n", caps);
@ -5846,8 +5884,8 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
enum lru_list lru;
unsigned long nr_reclaimed = 0;
unsigned long nr_to_reclaim = sc->nr_to_reclaim;
bool proportional_reclaim;
struct blk_plug plug;
bool scan_adjusted;
if (lru_gen_enabled()) {
lru_gen_shrink_lruvec(lruvec, sc);
@ -5870,8 +5908,8 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
* abort proportional reclaim if either the file or anon lru has already
* dropped to zero at the first pass.
*/
scan_adjusted = (!cgroup_reclaim(sc) && !current_is_kswapd() &&
sc->priority == DEF_PRIORITY);
proportional_reclaim = (!cgroup_reclaim(sc) && !current_is_kswapd() &&
sc->priority == DEF_PRIORITY);
blk_start_plug(&plug);
while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] ||
@ -5891,7 +5929,7 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
cond_resched();
if (nr_reclaimed < nr_to_reclaim || scan_adjusted)
if (nr_reclaimed < nr_to_reclaim || proportional_reclaim)
continue;
/*
@ -5942,8 +5980,6 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
nr_scanned = targets[lru] - nr[lru];
nr[lru] = targets[lru] * (100 - percentage) / 100;
nr[lru] -= min(nr[lru], nr_scanned);
scan_adjusted = true;
}
blk_finish_plug(&plug);
sc->nr_reclaimed += nr_reclaimed;

View file

@ -1,4 +1,5 @@
# SPDX-License-Identifier: GPL-2.0-only
generated/bit-length.h
generated/map-shift.h
idr.c
idr-test

View file

@ -18,9 +18,14 @@ endif
ifeq ($(BUILD), 32)
CFLAGS += -m32
LDFLAGS += -m32
LONG_BIT := 32
endif
targets: generated/map-shift.h $(TARGETS)
ifndef LONG_BIT
LONG_BIT := $(shell getconf LONG_BIT)
endif
targets: generated/map-shift.h generated/bit-length.h $(TARGETS)
main: $(OFILES)
@ -34,11 +39,11 @@ maple: $(CORE_OFILES)
multiorder: multiorder.o $(CORE_OFILES)
clean:
$(RM) $(TARGETS) *.o radix-tree.c idr.c generated/map-shift.h
$(RM) $(TARGETS) *.o radix-tree.c idr.c generated/map-shift.h generated/bit-length.h
vpath %.c ../../lib
$(OFILES): Makefile *.h */*.h generated/map-shift.h \
$(OFILES): Makefile *.h */*.h generated/map-shift.h generated/bit-length.h \
../../include/linux/*.h \
../../include/asm/*.h \
../../../include/linux/xarray.h \
@ -61,3 +66,11 @@ generated/map-shift.h:
echo "#define XA_CHUNK_SHIFT $(SHIFT)" > \
generated/map-shift.h; \
fi
generated/bit-length.h: FORCE
@if ! grep -qws CONFIG_$(LONG_BIT)BIT generated/bit-length.h; then \
echo "Generating $@"; \
echo "#define CONFIG_$(LONG_BIT)BIT 1" > $@; \
fi
FORCE: ;

View file

@ -1,2 +1,2 @@
#include "bit-length.h"
#define CONFIG_XARRAY_MULTI 1
#define CONFIG_64BIT 1

View file

@ -129,6 +129,10 @@ void kmem_cache_free_bulk(struct kmem_cache *cachep, size_t size, void **list)
pthread_mutex_unlock(&cachep->lock);
}
void kmem_cache_shrink(struct kmem_cache *cachep)
{
}
int kmem_cache_alloc_bulk(struct kmem_cache *cachep, gfp_t gfp, size_t size,
void **p)
{

File diff suppressed because it is too large Load diff

View file

@ -150,7 +150,7 @@ do_preprocess()
let lines=3
out=`basename "$in"`"-slabs-by-loss"
`cat "$in" | grep -A "$lines" 'Slabs sorted by loss' |\
egrep -iv '\-\-|Name|Slabs'\
grep -E -iv '\-\-|Name|Slabs'\
| awk '{print $1" "$4+$2*$3" "$4}' > "$out"`
if [ $? -eq 0 ]; then
do_slabs_plotting "$out"
@ -159,7 +159,7 @@ do_preprocess()
let lines=3
out=`basename "$in"`"-slabs-by-size"
`cat "$in" | grep -A "$lines" 'Slabs sorted by size' |\
egrep -iv '\-\-|Name|Slabs'\
grep -E -iv '\-\-|Name|Slabs'\
| awk '{print $1" "$4" "$4-$2*$3}' > "$out"`
if [ $? -eq 0 ]; then
do_slabs_plotting "$out"