From 6f0df8e16eb543167f2929cb756e695709a3551d Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 23 Aug 2023 15:54:30 -0700 Subject: [PATCH 1/7] memcontrol: ensure memcg acquired by id is properly set up In the eviction recency check, we attempt to retrieve the memcg to which the folio belonged when it was evicted, by the memcg id stored in the shadow entry. However, there is a chance that the retrieved memcg is not the original memcg that has been killed, but a new one which happens to have the same id. This is a somewhat unfortunate, but acceptable and rare inaccuracy in the heuristics. However, if we retrieve this new memcg between its allocation and when it is properly attached to the memcg hierarchy, we could run into the following NULL pointer exception during the memcg hierarchy traversal done in mem_cgroup_get_nr_swap_pages(): [ 155757.793456] BUG: kernel NULL pointer dereference, address: 00000000000000c0 [ 155757.807568] #PF: supervisor read access in kernel mode [ 155757.818024] #PF: error_code(0x0000) - not-present page [ 155757.828482] PGD 401f77067 P4D 401f77067 PUD 401f76067 PMD 0 [ 155757.839985] Oops: 0000 [#1] SMP [ 155757.887870] RIP: 0010:mem_cgroup_get_nr_swap_pages+0x3d/0xb0 [ 155757.899377] Code: 29 19 4a 02 48 39 f9 74 63 48 8b 97 c0 00 00 00 48 8b b7 58 02 00 00 48 2b b7 c0 01 00 00 48 39 f0 48 0f 4d c6 48 39 d1 74 42 <48> 8b b2 c0 00 00 00 48 8b ba 58 02 00 00 48 2b ba c0 01 00 00 48 [ 155757.937125] RSP: 0018:ffffc9002ecdfbc8 EFLAGS: 00010286 [ 155757.947755] RAX: 00000000003a3b1c RBX: 000007ffffffffff RCX: ffff888280183000 [ 155757.962202] RDX: 0000000000000000 RSI: 0007ffffffffffff RDI: ffff888bbc2d1000 [ 155757.976648] RBP: 0000000000000001 R08: 000000000000000b R09: ffff888ad9cedba0 [ 155757.991094] R10: ffffea0039c07900 R11: 0000000000000010 R12: ffff888b23a7b000 [ 155758.005540] R13: 0000000000000000 R14: ffff888bbc2d1000 R15: 000007ffffc71354 [ 155758.019991] FS: 00007f6234c68640(0000) GS:ffff88903f9c0000(0000) knlGS:0000000000000000 [ 155758.036356] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 155758.048023] CR2: 00000000000000c0 CR3: 0000000a83eb8004 CR4: 00000000007706e0 [ 155758.062473] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 155758.076924] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 155758.091376] PKRU: 55555554 [ 155758.096957] Call Trace: [ 155758.102016] [ 155758.106502] ? __die+0x78/0xc0 [ 155758.112793] ? page_fault_oops+0x286/0x380 [ 155758.121175] ? exc_page_fault+0x5d/0x110 [ 155758.129209] ? asm_exc_page_fault+0x22/0x30 [ 155758.137763] ? mem_cgroup_get_nr_swap_pages+0x3d/0xb0 [ 155758.148060] workingset_test_recent+0xda/0x1b0 [ 155758.157133] workingset_refault+0xca/0x1e0 [ 155758.165508] filemap_add_folio+0x4d/0x70 [ 155758.173538] page_cache_ra_unbounded+0xed/0x190 [ 155758.182919] page_cache_sync_ra+0xd6/0x1e0 [ 155758.191738] filemap_read+0x68d/0xdf0 [ 155758.199495] ? mlx5e_napi_poll+0x123/0x940 [ 155758.207981] ? __napi_schedule+0x55/0x90 [ 155758.216095] __x64_sys_pread64+0x1d6/0x2c0 [ 155758.224601] do_syscall_64+0x3d/0x80 [ 155758.232058] entry_SYSCALL_64_after_hwframe+0x46/0xb0 [ 155758.242473] RIP: 0033:0x7f62c29153b5 [ 155758.249938] Code: e8 48 89 75 f0 89 7d f8 48 89 4d e0 e8 b4 e6 f7 ff 41 89 c0 4c 8b 55 e0 48 8b 55 e8 48 8b 75 f0 8b 7d f8 b8 11 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 33 44 89 c7 48 89 45 f8 e8 e7 e6 f7 ff 48 8b [ 155758.288005] RSP: 002b:00007f6234c5ffd0 EFLAGS: 00000293 ORIG_RAX: 0000000000000011 [ 155758.303474] RAX: ffffffffffffffda RBX: 00007f628c4e70c0 RCX: 00007f62c29153b5 [ 155758.318075] RDX: 000000000003c041 RSI: 00007f61d2986000 RDI: 0000000000000076 [ 155758.332678] RBP: 00007f6234c5fff0 R08: 0000000000000000 R09: 0000000064d5230c [ 155758.347452] R10: 000000000027d450 R11: 0000000000000293 R12: 000000000003c041 [ 155758.362044] R13: 00007f61d2986000 R14: 00007f629e11b060 R15: 000000000027d450 [ 155758.376661] This patch fixes the issue by moving the memcg's id publication from the alloc stage to online stage, ensuring that any memcg acquired via id must be connected to the memcg tree. Link: https://lkml.kernel.org/r/20230823225430.166925-1-nphamcs@gmail.com Fixes: f78dfc7b77d5 ("workingset: fix confusion around eviction vs refault container") Signed-off-by: Johannes Weiner Co-developed-by: Nhat Pham Signed-off-by: Nhat Pham Acked-by: Shakeel Butt Cc: Yosry Ahmed Cc: Michal Hocko Cc: Roman Gushchin Cc: Muchun Song Cc: Signed-off-by: Andrew Morton --- mm/memcontrol.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index b29b850cf399..a4d3282493b6 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -5326,7 +5326,6 @@ static struct mem_cgroup *mem_cgroup_alloc(void) INIT_LIST_HEAD(&memcg->deferred_split_queue.split_queue); memcg->deferred_split_queue.split_queue_len = 0; #endif - idr_replace(&mem_cgroup_idr, memcg, memcg->id.id); lru_gen_init_memcg(memcg); return memcg; fail: @@ -5398,14 +5397,27 @@ static int mem_cgroup_css_online(struct cgroup_subsys_state *css) if (alloc_shrinker_info(memcg)) goto offline_kmem; - /* Online state pins memcg ID, memcg ID pins CSS */ - refcount_set(&memcg->id.ref, 1); - css_get(css); - if (unlikely(mem_cgroup_is_root(memcg))) queue_delayed_work(system_unbound_wq, &stats_flush_dwork, FLUSH_TIME); lru_gen_online_memcg(memcg); + + /* Online state pins memcg ID, memcg ID pins CSS */ + refcount_set(&memcg->id.ref, 1); + css_get(css); + + /* + * Ensure mem_cgroup_from_id() works once we're fully online. + * + * We could do this earlier and require callers to filter with + * css_tryget_online(). But right now there are no users that + * need earlier access, and the workingset code relies on the + * cgroup tree linkage (mem_cgroup_get_nr_swap_pages()). So + * publish it here at the end of onlining. This matches the + * regular ID destruction during offlining. + */ + idr_replace(&mem_cgroup_idr, memcg, memcg->id.id); + return 0; offline_kmem: memcg_offline_kmem(memcg); From 7f33105cdd59a99d068d3d147723a865d10e2260 Mon Sep 17 00:00:00 2001 From: Xie XiuQi Date: Thu, 31 Aug 2023 11:42:05 +0800 Subject: [PATCH 2/7] tools/mm: fix undefined reference to pthread_once Commit 97d5f2e9ee12 ("tools api fs: More thread safety for global filesystem variables") introduces pthread_once, so the libpthread should be added at link time, or we'll meet the following compile error when 'make -C tools/mm': gcc -Wall -Wextra -I../lib/ -o page-types page-types.c ../lib/api/libapi.a ~/linux/tools/lib/api/fs/fs.c:146: undefined reference to `pthread_once' ~/linux/tools/lib/api/fs/fs.c:147: undefined reference to `pthread_once' ~/linux/tools/lib/api/fs/fs.c:148: undefined reference to `pthread_once' ~/linux/tools/lib/api/fs/fs.c:149: undefined reference to `pthread_once' ~/linux/tools/lib/api/fs/fs.c:150: undefined reference to `pthread_once' /usr/bin/ld: ../lib/api/libapi.a(libapi-in.o):~/linux/tools/lib/api/fs/fs.c:151: more undefined references to `pthread_once' follow collect2: error: ld returned 1 exit status make: *** [Makefile:22: page-types] Error 1 Link: https://lkml.kernel.org/r/20230831034205.2376653-1-xiexiuqi@huaweicloud.com Fixes: 97d5f2e9ee12 ("tools api fs: More thread safety for global filesystem variables") Signed-off-by: Xie XiuQi Acked-by: Ian Rogers Cc: Matthew Wilcox Signed-off-by: Andrew Morton --- tools/mm/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/mm/Makefile b/tools/mm/Makefile index 6c1da51f4177..1c5606cc3334 100644 --- a/tools/mm/Makefile +++ b/tools/mm/Makefile @@ -8,8 +8,8 @@ TARGETS=page-types slabinfo page_owner_sort LIB_DIR = ../lib/api LIBS = $(LIB_DIR)/libapi.a -CFLAGS += -Wall -Wextra -I../lib/ -LDFLAGS += $(LIBS) +CFLAGS += -Wall -Wextra -I../lib/ -pthread +LDFLAGS += $(LIBS) -pthread all: $(TARGETS) From 0818e739b5c061b0251c30152380600fb9b84c0c Mon Sep 17 00:00:00 2001 From: "Joel Fernandes (Google)" Date: Mon, 4 Sep 2023 18:08:04 +0000 Subject: [PATCH 3/7] mm/vmalloc: add a safer version of find_vm_area() for debug It is unsafe to dump vmalloc area information when trying to do so from some contexts. Add a safer trylock version of the same function to do a best-effort VMA finding and use it from vmalloc_dump_obj(). [applied test robot feedback on unused function fix.] [applied Uladzislau feedback on locking.] Link: https://lkml.kernel.org/r/20230904180806.1002832-1-joel@joelfernandes.org Fixes: 98f180837a89 ("mm: Make mem_dump_obj() handle vmalloc() memory") Signed-off-by: Joel Fernandes (Google) Reviewed-by: Uladzislau Rezki (Sony) Reported-by: Zhen Lei Cc: Paul E. McKenney Cc: Zqiang Cc: Cc: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- mm/vmalloc.c | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 228a4a5312f2..ef8599d394fd 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -4278,14 +4278,32 @@ void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms) #ifdef CONFIG_PRINTK bool vmalloc_dump_obj(void *object) { - struct vm_struct *vm; void *objp = (void *)PAGE_ALIGN((unsigned long)object); + const void *caller; + struct vm_struct *vm; + struct vmap_area *va; + unsigned long addr; + unsigned int nr_pages; - vm = find_vm_area(objp); - if (!vm) + if (!spin_trylock(&vmap_area_lock)) return false; + va = __find_vmap_area((unsigned long)objp, &vmap_area_root); + if (!va) { + spin_unlock(&vmap_area_lock); + return false; + } + + vm = va->vm; + if (!vm) { + spin_unlock(&vmap_area_lock); + return false; + } + addr = (unsigned long)vm->addr; + caller = vm->caller; + nr_pages = vm->nr_pages; + spin_unlock(&vmap_area_lock); pr_cont(" %u-page vmalloc region starting at %#lx allocated at %pS\n", - vm->nr_pages, (unsigned long)vm->addr, vm->caller); + nr_pages, addr, caller); return true; } #endif From c83ad36a18c02c0f51280b50272327807916987f Mon Sep 17 00:00:00 2001 From: Zqiang Date: Mon, 4 Sep 2023 18:08:05 +0000 Subject: [PATCH 4/7] rcu: dump vmalloc memory info safely Currently, for double invoke call_rcu(), will dump rcu_head objects memory info, if the objects is not allocated from the slab allocator, the vmalloc_dump_obj() will be invoke and the vmap_area_lock spinlock need to be held, since the call_rcu() can be invoked in interrupt context, therefore, there is a possibility of spinlock deadlock scenarios. And in Preempt-RT kernel, the rcutorture test also trigger the following lockdep warning: BUG: sleeping function called from invalid context at kernel/locking/spinlock_rt.c:48 in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 1, name: swapper/0 preempt_count: 1, expected: 0 RCU nest depth: 1, expected: 1 3 locks held by swapper/0/1: #0: ffffffffb534ee80 (fullstop_mutex){+.+.}-{4:4}, at: torture_init_begin+0x24/0xa0 #1: ffffffffb5307940 (rcu_read_lock){....}-{1:3}, at: rcu_torture_init+0x1ec7/0x2370 #2: ffffffffb536af40 (vmap_area_lock){+.+.}-{3:3}, at: find_vmap_area+0x1f/0x70 irq event stamp: 565512 hardirqs last enabled at (565511): [] __call_rcu_common+0x218/0x940 hardirqs last disabled at (565512): [] rcu_torture_init+0x20b2/0x2370 softirqs last enabled at (399112): [] __local_bh_enable_ip+0x126/0x170 softirqs last disabled at (399106): [] inet_register_protosw+0x9/0x1d0 Preemption disabled at: [] rcu_torture_init+0x1f13/0x2370 CPU: 0 PID: 1 Comm: swapper/0 Tainted: G W 6.5.0-rc4-rt2-yocto-preempt-rt+ #15 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.2-0-gea1b7a073390-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack_lvl+0x68/0xb0 dump_stack+0x14/0x20 __might_resched+0x1aa/0x280 ? __pfx_rcu_torture_err_cb+0x10/0x10 rt_spin_lock+0x53/0x130 ? find_vmap_area+0x1f/0x70 find_vmap_area+0x1f/0x70 vmalloc_dump_obj+0x20/0x60 mem_dump_obj+0x22/0x90 __call_rcu_common+0x5bf/0x940 ? debug_smp_processor_id+0x1b/0x30 call_rcu_hurry+0x14/0x20 rcu_torture_init+0x1f82/0x2370 ? __pfx_rcu_torture_leak_cb+0x10/0x10 ? __pfx_rcu_torture_leak_cb+0x10/0x10 ? __pfx_rcu_torture_init+0x10/0x10 do_one_initcall+0x6c/0x300 ? debug_smp_processor_id+0x1b/0x30 kernel_init_freeable+0x2b9/0x540 ? __pfx_kernel_init+0x10/0x10 kernel_init+0x1f/0x150 ret_from_fork+0x40/0x50 ? __pfx_kernel_init+0x10/0x10 ret_from_fork_asm+0x1b/0x30 The previous patch fixes this by using the deadlock-safe best-effort version of find_vm_area. However, in case of failure print the fact that the pointer was a vmalloc pointer so that we print at least something. Link: https://lkml.kernel.org/r/20230904180806.1002832-2-joel@joelfernandes.org Fixes: 98f180837a89 ("mm: Make mem_dump_obj() handle vmalloc() memory") Signed-off-by: Zqiang Signed-off-by: Joel Fernandes (Google) Reported-by: Zhen Lei Reviewed-by: Matthew Wilcox (Oracle) Cc: Paul E. McKenney Cc: Uladzislau Rezki (Sony) Cc: Signed-off-by: Andrew Morton --- mm/util.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mm/util.c b/mm/util.c index f08b655da917..8cbbfd3a3d59 100644 --- a/mm/util.c +++ b/mm/util.c @@ -1068,7 +1068,9 @@ void mem_dump_obj(void *object) if (vmalloc_dump_obj(object)) return; - if (virt_addr_valid(object)) + if (is_vmalloc_addr(object)) + type = "vmalloc memory"; + else if (virt_addr_valid(object)) type = "non-slab/vmalloc memory"; else if (object == NULL) type = "NULL pointer"; From 2562d67b1bdf91c7395b0225d60fdeb26b4bc5a0 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Sat, 2 Sep 2023 15:59:31 -0700 Subject: [PATCH 5/7] revert "memfd: improve userspace warnings for missing exec-related flags". This warning is telling userspace developers to pass MFD_EXEC and MFD_NOEXEC_SEAL to memfd_create(). Commit 434ed3350f57 ("memfd: improve userspace warnings for missing exec-related flags") made the warning more frequent and visible in the hope that this would accelerate the fixing of errant userspace. But the overall effect is to generate far too much dmesg noise. Fixes: 434ed3350f57 ("memfd: improve userspace warnings for missing exec-related flags") Reported-by: Damian Tometzki Closes: https://lkml.kernel.org/r/ZPFzCSIgZ4QuHsSC@fedora.fritz.box Cc: Aleksa Sarai Cc: Christian Brauner Cc: Daniel Verkamp Cc: Jeff Xu Cc: Kees Cook Cc: Shuah Khan Cc: Signed-off-by: Andrew Morton --- mm/memfd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/memfd.c b/mm/memfd.c index 1cad1904fc26..2dba2cb6f0d0 100644 --- a/mm/memfd.c +++ b/mm/memfd.c @@ -316,7 +316,7 @@ SYSCALL_DEFINE2(memfd_create, return -EINVAL; if (!(flags & (MFD_EXEC | MFD_NOEXEC_SEAL))) { - pr_info_ratelimited( + pr_warn_once( "%s[%d]: memfd_create() called without MFD_EXEC or MFD_NOEXEC_SEAL set\n", current->comm, task_pid_nr(current)); } From d256d1cd8da1cbc4615de69df71c87ce623fec2f Mon Sep 17 00:00:00 2001 From: Tong Tiangen Date: Mon, 28 Aug 2023 10:25:27 +0800 Subject: [PATCH 6/7] mm: memory-failure: use rcu lock instead of tasklist_lock when collect_procs() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We found a softlock issue in our test, analyzed the logs, and found that the relevant CPU call trace as follows: CPU0: _do_fork -> copy_process() -> write_lock_irq(&tasklist_lock) //Disable irq,waiting for //tasklist_lock CPU1: wp_page_copy() ->pte_offset_map_lock() -> spin_lock(&page->ptl); //Hold page->ptl -> ptep_clear_flush() -> flush_tlb_others() ... -> smp_call_function_many() -> arch_send_call_function_ipi_mask() -> csd_lock_wait() //Waiting for other CPUs respond //IPI CPU2: collect_procs_anon() -> read_lock(&tasklist_lock) //Hold tasklist_lock ->for_each_process(tsk) -> page_mapped_in_vma() -> page_vma_mapped_walk() -> map_pte() ->spin_lock(&page->ptl) //Waiting for page->ptl We can see that CPU1 waiting for CPU0 respond IPI,CPU0 waiting for CPU2 unlock tasklist_lock, CPU2 waiting for CPU1 unlock page->ptl. As a result, softlockup is triggered. For collect_procs_anon(), what we're doing is task list iteration, during the iteration, with the help of call_rcu(), the task_struct object is freed only after one or more grace periods elapse. the logic as follows: release_task() -> __exit_signal() -> __unhash_process() -> list_del_rcu() -> put_task_struct_rcu_user() -> call_rcu(&task->rcu, delayed_put_task_struct) delayed_put_task_struct() -> put_task_struct() -> if (refcount_sub_and_test()) __put_task_struct() -> free_task() Therefore, under the protection of the rcu lock, we can safely use get_task_struct() to ensure a safe reference to task_struct during the iteration. By removing the use of tasklist_lock in task list iteration, we can break the softlock chain above. The same logic can also be applied to: - collect_procs_file() - collect_procs_fsdax() - collect_procs_ksm() Link: https://lkml.kernel.org/r/20230828022527.241693-1-tongtiangen@huawei.com Signed-off-by: Tong Tiangen Acked-by: Naoya Horiguchi Cc: Kefeng Wang Cc: Matthew Wilcox (Oracle) Cc: Miaohe Lin Cc: Paul E. McKenney Signed-off-by: Andrew Morton --- mm/filemap.c | 3 --- mm/ksm.c | 4 ++-- mm/memory-failure.c | 16 ++++++++-------- 3 files changed, 10 insertions(+), 13 deletions(-) diff --git a/mm/filemap.c b/mm/filemap.c index bf6219d9aaac..582f5317ff71 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -121,9 +121,6 @@ * bdi.wb->list_lock (zap_pte_range->set_page_dirty) * ->inode->i_lock (zap_pte_range->set_page_dirty) * ->private_lock (zap_pte_range->block_dirty_folio) - * - * ->i_mmap_rwsem - * ->tasklist_lock (memory_failure, collect_procs_ao) */ static void page_cache_delete(struct address_space *mapping, diff --git a/mm/ksm.c b/mm/ksm.c index 8d6aee05421d..981af9c72e7a 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -2925,7 +2925,7 @@ void collect_procs_ksm(struct page *page, struct list_head *to_kill, struct anon_vma *av = rmap_item->anon_vma; anon_vma_lock_read(av); - read_lock(&tasklist_lock); + rcu_read_lock(); for_each_process(tsk) { struct anon_vma_chain *vmac; unsigned long addr; @@ -2944,7 +2944,7 @@ void collect_procs_ksm(struct page *page, struct list_head *to_kill, } } } - read_unlock(&tasklist_lock); + rcu_read_unlock(); anon_vma_unlock_read(av); } } diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 881c35ef1daa..d06b0fba09fb 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -547,8 +547,8 @@ static void kill_procs(struct list_head *to_kill, int forcekill, bool fail, * on behalf of the thread group. Return task_struct of the (first found) * dedicated thread if found, and return NULL otherwise. * - * We already hold read_lock(&tasklist_lock) in the caller, so we don't - * have to call rcu_read_lock/unlock() in this function. + * We already hold rcu lock in the caller, so we don't have to call + * rcu_read_lock/unlock() in this function. */ static struct task_struct *find_early_kill_thread(struct task_struct *tsk) { @@ -609,7 +609,7 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill, return; pgoff = page_to_pgoff(page); - read_lock(&tasklist_lock); + rcu_read_lock(); for_each_process(tsk) { struct anon_vma_chain *vmac; struct task_struct *t = task_early_kill(tsk, force_early); @@ -626,7 +626,7 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill, add_to_kill_anon_file(t, page, vma, to_kill); } } - read_unlock(&tasklist_lock); + rcu_read_unlock(); anon_vma_unlock_read(av); } @@ -642,7 +642,7 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill, pgoff_t pgoff; i_mmap_lock_read(mapping); - read_lock(&tasklist_lock); + rcu_read_lock(); pgoff = page_to_pgoff(page); for_each_process(tsk) { struct task_struct *t = task_early_kill(tsk, force_early); @@ -662,7 +662,7 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill, add_to_kill_anon_file(t, page, vma, to_kill); } } - read_unlock(&tasklist_lock); + rcu_read_unlock(); i_mmap_unlock_read(mapping); } @@ -685,7 +685,7 @@ static void collect_procs_fsdax(struct page *page, struct task_struct *tsk; i_mmap_lock_read(mapping); - read_lock(&tasklist_lock); + rcu_read_lock(); for_each_process(tsk) { struct task_struct *t = task_early_kill(tsk, true); @@ -696,7 +696,7 @@ static void collect_procs_fsdax(struct page *page, add_to_kill_fsdax(t, page, vma, to_kill, pgoff); } } - read_unlock(&tasklist_lock); + rcu_read_unlock(); i_mmap_unlock_read(mapping); } #endif /* CONFIG_FS_DAX */ From f4b4f3ec1a310c3de9797271a9c06b7499470d69 Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (IBM)" Date: Mon, 4 Sep 2023 20:37:59 +0300 Subject: [PATCH 7/7] sparc64: add missing initialization of folio in tlb_batch_add() Commit 1a10a44dfc1d ("sparc64: implement the new page table range API") missed initialization of folio variable in tlb_batch_add() which causes boot tests to crash. Add missing initialization. Link: https://lkml.kernel.org/r/20230904174350.GF3223@kernel.org Fixes: 1a10a44dfc1d ("sparc64: implement the new page table range API") Signed-off-by: Mike Rapoport (IBM) Reported-by: Guenter Roeck Tested-by: Guenter Roeck Signed-off-by: Andrew Morton --- arch/sparc/mm/tlb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/sparc/mm/tlb.c b/arch/sparc/mm/tlb.c index 0d41c94ec3ac..b44d79d778c7 100644 --- a/arch/sparc/mm/tlb.c +++ b/arch/sparc/mm/tlb.c @@ -128,6 +128,7 @@ void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr, goto no_cache_flush; /* A real file page? */ + folio = page_folio(page); mapping = folio_flush_mapping(folio); if (!mapping) goto no_cache_flush;