linux/mm/swap.h
Barry Song 9d57090e73 mm: fix swap_read_folio_zeromap() for large folios with partial zeromap
Patch series "mm: enable large folios swap-in support", v9.

Currently, we support mTHP swapout but not swapin.  This means that once
mTHP is swapped out, it will come back as small folios when swapped in. 
This is particularly detrimental for devices like Android, where more than
half of the memory is in swap.

The lack of mTHP swapin functionality makes mTHP a showstopper in
scenarios that heavily rely on swap.  This patchset introduces mTHP
swap-in support.  It starts with synchronous devices similar to zRAM,
aiming to benefit as many users as possible with minimal changes.


This patch (of 3):

There could be a corner case where the first entry is non-zeromap, but a
subsequent entry is zeromap.  In this case, we should not let
swap_read_folio_zeromap() return false since we will still read corrupted
data.

Additionally, the iteration of test_bit() is unnecessary and can be
replaced with bitmap operations, which are more efficient.

We can adopt the style of swap_pte_batch() and folio_pte_batch() to
introduce swap_zeromap_batch() which seems to provide the greatest
flexibility for the caller.  This approach allows the caller to either
check if the zeromap status of all entries is consistent or determine the
number of contiguous entries with the same status.

Since swap_read_folio() can't handle reading a large folio that's
partially zeromap and partially non-zeromap, we've moved the code to
mm/swap.h so that others, like those working on swap-in, can access it.

Link: https://lkml.kernel.org/r/20240908232119.2157-1-21cnbao@gmail.com
Link: https://lkml.kernel.org/r/20240908232119.2157-2-21cnbao@gmail.com
Fixes: 0ca0c24e32 ("mm: store zero pages to be swapped out in a bitmap")
Signed-off-by: Barry Song <v-songbaohua@oppo.com>
Reviewed-by: Yosry Ahmed <yosryahmed@google.com>
Reviewed-by: Usama Arif <usamaarif642@gmail.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Chris Li <chrisl@kernel.org>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Chuanhua Han <hanchuanhua@oppo.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Gao Xiang <xiang@kernel.org>
Cc: Huang Ying <ying.huang@intel.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Kairui Song <kasong@tencent.com>
Cc: Kairui Song <ryncsn@gmail.com>
Cc: Kalesh Singh <kaleshsingh@google.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Nhat Pham <nphamcs@gmail.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Sergey Senozhatsky <senozhatsky@chromium.org>
Cc: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Kanchana P Sridhar <kanchana.p.sridhar@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2024-09-17 01:07:01 -07:00

210 lines
5.4 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _MM_SWAP_H
#define _MM_SWAP_H
struct mempolicy;
#ifdef CONFIG_SWAP
#include <linux/swapops.h> /* for swp_offset */
#include <linux/blk_types.h> /* for bio_end_io_t */
/* linux/mm/page_io.c */
int sio_pool_init(void);
struct swap_iocb;
void swap_read_folio(struct folio *folio, struct swap_iocb **plug);
void __swap_read_unplug(struct swap_iocb *plug);
static inline void swap_read_unplug(struct swap_iocb *plug)
{
if (unlikely(plug))
__swap_read_unplug(plug);
}
void swap_write_unplug(struct swap_iocb *sio);
int swap_writepage(struct page *page, struct writeback_control *wbc);
void __swap_writepage(struct folio *folio, struct writeback_control *wbc);
/* linux/mm/swap_state.c */
/* One swap address space for each 64M swap space */
#define SWAP_ADDRESS_SPACE_SHIFT 14
#define SWAP_ADDRESS_SPACE_PAGES (1 << SWAP_ADDRESS_SPACE_SHIFT)
#define SWAP_ADDRESS_SPACE_MASK (SWAP_ADDRESS_SPACE_PAGES - 1)
extern struct address_space *swapper_spaces[];
#define swap_address_space(entry) \
(&swapper_spaces[swp_type(entry)][swp_offset(entry) \
>> SWAP_ADDRESS_SPACE_SHIFT])
/*
* Return the swap device position of the swap entry.
*/
static inline loff_t swap_dev_pos(swp_entry_t entry)
{
return ((loff_t)swp_offset(entry)) << PAGE_SHIFT;
}
/*
* Return the swap cache index of the swap entry.
*/
static inline pgoff_t swap_cache_index(swp_entry_t entry)
{
BUILD_BUG_ON((SWP_OFFSET_MASK | SWAP_ADDRESS_SPACE_MASK) != SWP_OFFSET_MASK);
return swp_offset(entry) & SWAP_ADDRESS_SPACE_MASK;
}
void show_swap_cache_info(void);
bool add_to_swap(struct folio *folio);
void *get_shadow_from_swap_cache(swp_entry_t entry);
int add_to_swap_cache(struct folio *folio, swp_entry_t entry,
gfp_t gfp, void **shadowp);
void __delete_from_swap_cache(struct folio *folio,
swp_entry_t entry, void *shadow);
void delete_from_swap_cache(struct folio *folio);
void clear_shadow_from_swap_cache(int type, unsigned long begin,
unsigned long end);
void swapcache_clear(struct swap_info_struct *si, swp_entry_t entry, int nr);
struct folio *swap_cache_get_folio(swp_entry_t entry,
struct vm_area_struct *vma, unsigned long addr);
struct folio *filemap_get_incore_folio(struct address_space *mapping,
pgoff_t index);
struct folio *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
struct vm_area_struct *vma, unsigned long addr,
struct swap_iocb **plug);
struct folio *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_flags,
struct mempolicy *mpol, pgoff_t ilx, bool *new_page_allocated,
bool skip_if_exists);
struct folio *swap_cluster_readahead(swp_entry_t entry, gfp_t flag,
struct mempolicy *mpol, pgoff_t ilx);
struct folio *swapin_readahead(swp_entry_t entry, gfp_t flag,
struct vm_fault *vmf);
static inline unsigned int folio_swap_flags(struct folio *folio)
{
return swp_swap_info(folio->swap)->flags;
}
/*
* Return the count of contiguous swap entries that share the same
* zeromap status as the starting entry. If is_zeromap is not NULL,
* it will return the zeromap status of the starting entry.
*/
static inline int swap_zeromap_batch(swp_entry_t entry, int max_nr,
bool *is_zeromap)
{
struct swap_info_struct *sis = swp_swap_info(entry);
unsigned long start = swp_offset(entry);
unsigned long end = start + max_nr;
bool first_bit;
first_bit = test_bit(start, sis->zeromap);
if (is_zeromap)
*is_zeromap = first_bit;
if (max_nr <= 1)
return max_nr;
if (first_bit)
return find_next_zero_bit(sis->zeromap, end, start) - start;
else
return find_next_bit(sis->zeromap, end, start) - start;
}
#else /* CONFIG_SWAP */
struct swap_iocb;
static inline void swap_read_folio(struct folio *folio, struct swap_iocb **plug)
{
}
static inline void swap_write_unplug(struct swap_iocb *sio)
{
}
static inline struct address_space *swap_address_space(swp_entry_t entry)
{
return NULL;
}
static inline pgoff_t swap_cache_index(swp_entry_t entry)
{
return 0;
}
static inline void show_swap_cache_info(void)
{
}
static inline struct folio *swap_cluster_readahead(swp_entry_t entry,
gfp_t gfp_mask, struct mempolicy *mpol, pgoff_t ilx)
{
return NULL;
}
static inline struct folio *swapin_readahead(swp_entry_t swp, gfp_t gfp_mask,
struct vm_fault *vmf)
{
return NULL;
}
static inline int swap_writepage(struct page *p, struct writeback_control *wbc)
{
return 0;
}
static inline void swapcache_clear(struct swap_info_struct *si, swp_entry_t entry, int nr)
{
}
static inline struct folio *swap_cache_get_folio(swp_entry_t entry,
struct vm_area_struct *vma, unsigned long addr)
{
return NULL;
}
static inline
struct folio *filemap_get_incore_folio(struct address_space *mapping,
pgoff_t index)
{
return filemap_get_folio(mapping, index);
}
static inline bool add_to_swap(struct folio *folio)
{
return false;
}
static inline void *get_shadow_from_swap_cache(swp_entry_t entry)
{
return NULL;
}
static inline int add_to_swap_cache(struct folio *folio, swp_entry_t entry,
gfp_t gfp_mask, void **shadowp)
{
return -1;
}
static inline void __delete_from_swap_cache(struct folio *folio,
swp_entry_t entry, void *shadow)
{
}
static inline void delete_from_swap_cache(struct folio *folio)
{
}
static inline void clear_shadow_from_swap_cache(int type, unsigned long begin,
unsigned long end)
{
}
static inline unsigned int folio_swap_flags(struct folio *folio)
{
return 0;
}
static inline int swap_zeromap_batch(swp_entry_t entry, int max_nr,
bool *has_zeromap)
{
return 0;
}
#endif /* CONFIG_SWAP */
#endif /* _MM_SWAP_H */