linux/drivers/block/zram/zram_drv.c
Peter Kalauskas c8bd134a4b drivers/block/zram/zram_drv.c: fix bug storing backing_dev
The call to strlcpy in backing_dev_store is incorrect. It should take
the size of the destination buffer instead of the size of the source
buffer.  Additionally, ignore the newline character (\n) when reading
the new file_name buffer. This makes it possible to set the backing_dev
as follows:

	echo /dev/sdX > /sys/block/zram0/backing_dev

The reason it worked before was the fact that strlcpy() copies 'len - 1'
bytes, which is strlen(buf) - 1 in our case, so it accidentally didn't
copy the trailing new line symbol.  Which also means that "echo -n
/dev/sdX" most likely was broken.

Signed-off-by: Peter Kalauskas <peskal@google.com>
Link: http://lkml.kernel.org/r/20180813061623.GC64836@rodete-desktop-imager.corp.google.com
Acked-by: Minchan Kim <minchan@kernel.org>
Reviewed-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: <stable@vger.kernel.org>    [4.14+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2018-08-22 10:52:45 -07:00

1928 lines
45 KiB
C

/*
* Compressed RAM block device
*
* Copyright (C) 2008, 2009, 2010 Nitin Gupta
* 2012, 2013 Minchan Kim
*
* This code is released using a dual license strategy: BSD/GPL
* You can choose the licence that better fits your requirements.
*
* Released under the terms of 3-clause BSD License
* Released under the terms of GNU General Public License Version 2.0
*
*/
#define KMSG_COMPONENT "zram"
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/bio.h>
#include <linux/bitops.h>
#include <linux/blkdev.h>
#include <linux/buffer_head.h>
#include <linux/device.h>
#include <linux/genhd.h>
#include <linux/highmem.h>
#include <linux/slab.h>
#include <linux/backing-dev.h>
#include <linux/string.h>
#include <linux/vmalloc.h>
#include <linux/err.h>
#include <linux/idr.h>
#include <linux/sysfs.h>
#include <linux/debugfs.h>
#include <linux/cpuhotplug.h>
#include "zram_drv.h"
static DEFINE_IDR(zram_index_idr);
/* idr index must be protected */
static DEFINE_MUTEX(zram_index_mutex);
static int zram_major;
static const char *default_compressor = "lzo";
/* Module params (documentation at end) */
static unsigned int num_devices = 1;
/*
* Pages that compress to sizes equals or greater than this are stored
* uncompressed in memory.
*/
static size_t huge_class_size;
static void zram_free_page(struct zram *zram, size_t index);
static void zram_slot_lock(struct zram *zram, u32 index)
{
bit_spin_lock(ZRAM_LOCK, &zram->table[index].value);
}
static void zram_slot_unlock(struct zram *zram, u32 index)
{
bit_spin_unlock(ZRAM_LOCK, &zram->table[index].value);
}
static inline bool init_done(struct zram *zram)
{
return zram->disksize;
}
static inline bool zram_allocated(struct zram *zram, u32 index)
{
return (zram->table[index].value >> (ZRAM_FLAG_SHIFT + 1)) ||
zram->table[index].handle;
}
static inline struct zram *dev_to_zram(struct device *dev)
{
return (struct zram *)dev_to_disk(dev)->private_data;
}
static unsigned long zram_get_handle(struct zram *zram, u32 index)
{
return zram->table[index].handle;
}
static void zram_set_handle(struct zram *zram, u32 index, unsigned long handle)
{
zram->table[index].handle = handle;
}
/* flag operations require table entry bit_spin_lock() being held */
static bool zram_test_flag(struct zram *zram, u32 index,
enum zram_pageflags flag)
{
return zram->table[index].value & BIT(flag);
}
static void zram_set_flag(struct zram *zram, u32 index,
enum zram_pageflags flag)
{
zram->table[index].value |= BIT(flag);
}
static void zram_clear_flag(struct zram *zram, u32 index,
enum zram_pageflags flag)
{
zram->table[index].value &= ~BIT(flag);
}
static inline void zram_set_element(struct zram *zram, u32 index,
unsigned long element)
{
zram->table[index].element = element;
}
static unsigned long zram_get_element(struct zram *zram, u32 index)
{
return zram->table[index].element;
}
static size_t zram_get_obj_size(struct zram *zram, u32 index)
{
return zram->table[index].value & (BIT(ZRAM_FLAG_SHIFT) - 1);
}
static void zram_set_obj_size(struct zram *zram,
u32 index, size_t size)
{
unsigned long flags = zram->table[index].value >> ZRAM_FLAG_SHIFT;
zram->table[index].value = (flags << ZRAM_FLAG_SHIFT) | size;
}
#if PAGE_SIZE != 4096
static inline bool is_partial_io(struct bio_vec *bvec)
{
return bvec->bv_len != PAGE_SIZE;
}
#else
static inline bool is_partial_io(struct bio_vec *bvec)
{
return false;
}
#endif
/*
* Check if request is within bounds and aligned on zram logical blocks.
*/
static inline bool valid_io_request(struct zram *zram,
sector_t start, unsigned int size)
{
u64 end, bound;
/* unaligned request */
if (unlikely(start & (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1)))
return false;
if (unlikely(size & (ZRAM_LOGICAL_BLOCK_SIZE - 1)))
return false;
end = start + (size >> SECTOR_SHIFT);
bound = zram->disksize >> SECTOR_SHIFT;
/* out of range range */
if (unlikely(start >= bound || end > bound || start > end))
return false;
/* I/O request is valid */
return true;
}
static void update_position(u32 *index, int *offset, struct bio_vec *bvec)
{
*index += (*offset + bvec->bv_len) / PAGE_SIZE;
*offset = (*offset + bvec->bv_len) % PAGE_SIZE;
}
static inline void update_used_max(struct zram *zram,
const unsigned long pages)
{
unsigned long old_max, cur_max;
old_max = atomic_long_read(&zram->stats.max_used_pages);
do {
cur_max = old_max;
if (pages > cur_max)
old_max = atomic_long_cmpxchg(
&zram->stats.max_used_pages, cur_max, pages);
} while (old_max != cur_max);
}
static inline void zram_fill_page(void *ptr, unsigned long len,
unsigned long value)
{
WARN_ON_ONCE(!IS_ALIGNED(len, sizeof(unsigned long)));
memset_l(ptr, value, len / sizeof(unsigned long));
}
static bool page_same_filled(void *ptr, unsigned long *element)
{
unsigned int pos;
unsigned long *page;
unsigned long val;
page = (unsigned long *)ptr;
val = page[0];
for (pos = 1; pos < PAGE_SIZE / sizeof(*page); pos++) {
if (val != page[pos])
return false;
}
*element = val;
return true;
}
static ssize_t initstate_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
u32 val;
struct zram *zram = dev_to_zram(dev);
down_read(&zram->init_lock);
val = init_done(zram);
up_read(&zram->init_lock);
return scnprintf(buf, PAGE_SIZE, "%u\n", val);
}
static ssize_t disksize_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct zram *zram = dev_to_zram(dev);
return scnprintf(buf, PAGE_SIZE, "%llu\n", zram->disksize);
}
static ssize_t mem_limit_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t len)
{
u64 limit;
char *tmp;
struct zram *zram = dev_to_zram(dev);
limit = memparse(buf, &tmp);
if (buf == tmp) /* no chars parsed, invalid input */
return -EINVAL;
down_write(&zram->init_lock);
zram->limit_pages = PAGE_ALIGN(limit) >> PAGE_SHIFT;
up_write(&zram->init_lock);
return len;
}
static ssize_t mem_used_max_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t len)
{
int err;
unsigned long val;
struct zram *zram = dev_to_zram(dev);
err = kstrtoul(buf, 10, &val);
if (err || val != 0)
return -EINVAL;
down_read(&zram->init_lock);
if (init_done(zram)) {
atomic_long_set(&zram->stats.max_used_pages,
zs_get_total_pages(zram->mem_pool));
}
up_read(&zram->init_lock);
return len;
}
#ifdef CONFIG_ZRAM_WRITEBACK
static bool zram_wb_enabled(struct zram *zram)
{
return zram->backing_dev;
}
static void reset_bdev(struct zram *zram)
{
struct block_device *bdev;
if (!zram_wb_enabled(zram))
return;
bdev = zram->bdev;
if (zram->old_block_size)
set_blocksize(bdev, zram->old_block_size);
blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
/* hope filp_close flush all of IO */
filp_close(zram->backing_dev, NULL);
zram->backing_dev = NULL;
zram->old_block_size = 0;
zram->bdev = NULL;
zram->disk->queue->backing_dev_info->capabilities |=
BDI_CAP_SYNCHRONOUS_IO;
kvfree(zram->bitmap);
zram->bitmap = NULL;
}
static ssize_t backing_dev_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct zram *zram = dev_to_zram(dev);
struct file *file = zram->backing_dev;
char *p;
ssize_t ret;
down_read(&zram->init_lock);
if (!zram_wb_enabled(zram)) {
memcpy(buf, "none\n", 5);
up_read(&zram->init_lock);
return 5;
}
p = file_path(file, buf, PAGE_SIZE - 1);
if (IS_ERR(p)) {
ret = PTR_ERR(p);
goto out;
}
ret = strlen(p);
memmove(buf, p, ret);
buf[ret++] = '\n';
out:
up_read(&zram->init_lock);
return ret;
}
static ssize_t backing_dev_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t len)
{
char *file_name;
size_t sz;
struct file *backing_dev = NULL;
struct inode *inode;
struct address_space *mapping;
unsigned int bitmap_sz, old_block_size = 0;
unsigned long nr_pages, *bitmap = NULL;
struct block_device *bdev = NULL;
int err;
struct zram *zram = dev_to_zram(dev);
file_name = kmalloc(PATH_MAX, GFP_KERNEL);
if (!file_name)
return -ENOMEM;
down_write(&zram->init_lock);
if (init_done(zram)) {
pr_info("Can't setup backing device for initialized device\n");
err = -EBUSY;
goto out;
}
strlcpy(file_name, buf, PATH_MAX);
/* ignore trailing newline */
sz = strlen(file_name);
if (sz > 0 && file_name[sz - 1] == '\n')
file_name[sz - 1] = 0x00;
backing_dev = filp_open(file_name, O_RDWR|O_LARGEFILE, 0);
if (IS_ERR(backing_dev)) {
err = PTR_ERR(backing_dev);
backing_dev = NULL;
goto out;
}
mapping = backing_dev->f_mapping;
inode = mapping->host;
/* Support only block device in this moment */
if (!S_ISBLK(inode->i_mode)) {
err = -ENOTBLK;
goto out;
}
bdev = bdgrab(I_BDEV(inode));
err = blkdev_get(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL, zram);
if (err < 0)
goto out;
nr_pages = i_size_read(inode) >> PAGE_SHIFT;
bitmap_sz = BITS_TO_LONGS(nr_pages) * sizeof(long);
bitmap = kvzalloc(bitmap_sz, GFP_KERNEL);
if (!bitmap) {
err = -ENOMEM;
goto out;
}
old_block_size = block_size(bdev);
err = set_blocksize(bdev, PAGE_SIZE);
if (err)
goto out;
reset_bdev(zram);
spin_lock_init(&zram->bitmap_lock);
zram->old_block_size = old_block_size;
zram->bdev = bdev;
zram->backing_dev = backing_dev;
zram->bitmap = bitmap;
zram->nr_pages = nr_pages;
/*
* With writeback feature, zram does asynchronous IO so it's no longer
* synchronous device so let's remove synchronous io flag. Othewise,
* upper layer(e.g., swap) could wait IO completion rather than
* (submit and return), which will cause system sluggish.
* Furthermore, when the IO function returns(e.g., swap_readpage),
* upper layer expects IO was done so it could deallocate the page
* freely but in fact, IO is going on so finally could cause
* use-after-free when the IO is really done.
*/
zram->disk->queue->backing_dev_info->capabilities &=
~BDI_CAP_SYNCHRONOUS_IO;
up_write(&zram->init_lock);
pr_info("setup backing device %s\n", file_name);
kfree(file_name);
return len;
out:
if (bitmap)
kvfree(bitmap);
if (bdev)
blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
if (backing_dev)
filp_close(backing_dev, NULL);
up_write(&zram->init_lock);
kfree(file_name);
return err;
}
static unsigned long get_entry_bdev(struct zram *zram)
{
unsigned long entry;
spin_lock(&zram->bitmap_lock);
/* skip 0 bit to confuse zram.handle = 0 */
entry = find_next_zero_bit(zram->bitmap, zram->nr_pages, 1);
if (entry == zram->nr_pages) {
spin_unlock(&zram->bitmap_lock);
return 0;
}
set_bit(entry, zram->bitmap);
spin_unlock(&zram->bitmap_lock);
return entry;
}
static void put_entry_bdev(struct zram *zram, unsigned long entry)
{
int was_set;
spin_lock(&zram->bitmap_lock);
was_set = test_and_clear_bit(entry, zram->bitmap);
spin_unlock(&zram->bitmap_lock);
WARN_ON_ONCE(!was_set);
}
static void zram_page_end_io(struct bio *bio)
{
struct page *page = bio_first_page_all(bio);
page_endio(page, op_is_write(bio_op(bio)),
blk_status_to_errno(bio->bi_status));
bio_put(bio);
}
/*
* Returns 1 if the submission is successful.
*/
static int read_from_bdev_async(struct zram *zram, struct bio_vec *bvec,
unsigned long entry, struct bio *parent)
{
struct bio *bio;
bio = bio_alloc(GFP_ATOMIC, 1);
if (!bio)
return -ENOMEM;
bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9);
bio_set_dev(bio, zram->bdev);
if (!bio_add_page(bio, bvec->bv_page, bvec->bv_len, bvec->bv_offset)) {
bio_put(bio);
return -EIO;
}
if (!parent) {
bio->bi_opf = REQ_OP_READ;
bio->bi_end_io = zram_page_end_io;
} else {
bio->bi_opf = parent->bi_opf;
bio_chain(bio, parent);
}
submit_bio(bio);
return 1;
}
struct zram_work {
struct work_struct work;
struct zram *zram;
unsigned long entry;
struct bio *bio;
};
#if PAGE_SIZE != 4096
static void zram_sync_read(struct work_struct *work)
{
struct bio_vec bvec;
struct zram_work *zw = container_of(work, struct zram_work, work);
struct zram *zram = zw->zram;
unsigned long entry = zw->entry;
struct bio *bio = zw->bio;
read_from_bdev_async(zram, &bvec, entry, bio);
}
/*
* Block layer want one ->make_request_fn to be active at a time
* so if we use chained IO with parent IO in same context,
* it's a deadlock. To avoid, it, it uses worker thread context.
*/
static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec,
unsigned long entry, struct bio *bio)
{
struct zram_work work;
work.zram = zram;
work.entry = entry;
work.bio = bio;
INIT_WORK_ONSTACK(&work.work, zram_sync_read);
queue_work(system_unbound_wq, &work.work);
flush_work(&work.work);
destroy_work_on_stack(&work.work);
return 1;
}
#else
static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec,
unsigned long entry, struct bio *bio)
{
WARN_ON(1);
return -EIO;
}
#endif
static int read_from_bdev(struct zram *zram, struct bio_vec *bvec,
unsigned long entry, struct bio *parent, bool sync)
{
if (sync)
return read_from_bdev_sync(zram, bvec, entry, parent);
else
return read_from_bdev_async(zram, bvec, entry, parent);
}
static int write_to_bdev(struct zram *zram, struct bio_vec *bvec,
u32 index, struct bio *parent,
unsigned long *pentry)
{
struct bio *bio;
unsigned long entry;
bio = bio_alloc(GFP_ATOMIC, 1);
if (!bio)
return -ENOMEM;
entry = get_entry_bdev(zram);
if (!entry) {
bio_put(bio);
return -ENOSPC;
}
bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9);
bio_set_dev(bio, zram->bdev);
if (!bio_add_page(bio, bvec->bv_page, bvec->bv_len,
bvec->bv_offset)) {
bio_put(bio);
put_entry_bdev(zram, entry);
return -EIO;
}
if (!parent) {
bio->bi_opf = REQ_OP_WRITE | REQ_SYNC;
bio->bi_end_io = zram_page_end_io;
} else {
bio->bi_opf = parent->bi_opf;
bio_chain(bio, parent);
}
submit_bio(bio);
*pentry = entry;
return 0;
}
static void zram_wb_clear(struct zram *zram, u32 index)
{
unsigned long entry;
zram_clear_flag(zram, index, ZRAM_WB);
entry = zram_get_element(zram, index);
zram_set_element(zram, index, 0);
put_entry_bdev(zram, entry);
}
#else
static bool zram_wb_enabled(struct zram *zram) { return false; }
static inline void reset_bdev(struct zram *zram) {};
static int write_to_bdev(struct zram *zram, struct bio_vec *bvec,
u32 index, struct bio *parent,
unsigned long *pentry)
{
return -EIO;
}
static int read_from_bdev(struct zram *zram, struct bio_vec *bvec,
unsigned long entry, struct bio *parent, bool sync)
{
return -EIO;
}
static void zram_wb_clear(struct zram *zram, u32 index) {}
#endif
#ifdef CONFIG_ZRAM_MEMORY_TRACKING
static struct dentry *zram_debugfs_root;
static void zram_debugfs_create(void)
{
zram_debugfs_root = debugfs_create_dir("zram", NULL);
}
static void zram_debugfs_destroy(void)
{
debugfs_remove_recursive(zram_debugfs_root);
}
static void zram_accessed(struct zram *zram, u32 index)
{
zram->table[index].ac_time = ktime_get_boottime();
}
static void zram_reset_access(struct zram *zram, u32 index)
{
zram->table[index].ac_time = 0;
}
static ssize_t read_block_state(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
char *kbuf;
ssize_t index, written = 0;
struct zram *zram = file->private_data;
unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
struct timespec64 ts;
kbuf = kvmalloc(count, GFP_KERNEL);
if (!kbuf)
return -ENOMEM;
down_read(&zram->init_lock);
if (!init_done(zram)) {
up_read(&zram->init_lock);
kvfree(kbuf);
return -EINVAL;
}
for (index = *ppos; index < nr_pages; index++) {
int copied;
zram_slot_lock(zram, index);
if (!zram_allocated(zram, index))
goto next;
ts = ktime_to_timespec64(zram->table[index].ac_time);
copied = snprintf(kbuf + written, count,
"%12zd %12lld.%06lu %c%c%c\n",
index, (s64)ts.tv_sec,
ts.tv_nsec / NSEC_PER_USEC,
zram_test_flag(zram, index, ZRAM_SAME) ? 's' : '.',
zram_test_flag(zram, index, ZRAM_WB) ? 'w' : '.',
zram_test_flag(zram, index, ZRAM_HUGE) ? 'h' : '.');
if (count < copied) {
zram_slot_unlock(zram, index);
break;
}
written += copied;
count -= copied;
next:
zram_slot_unlock(zram, index);
*ppos += 1;
}
up_read(&zram->init_lock);
if (copy_to_user(buf, kbuf, written))
written = -EFAULT;
kvfree(kbuf);
return written;
}
static const struct file_operations proc_zram_block_state_op = {
.open = simple_open,
.read = read_block_state,
.llseek = default_llseek,
};
static void zram_debugfs_register(struct zram *zram)
{
if (!zram_debugfs_root)
return;
zram->debugfs_dir = debugfs_create_dir(zram->disk->disk_name,
zram_debugfs_root);
debugfs_create_file("block_state", 0400, zram->debugfs_dir,
zram, &proc_zram_block_state_op);
}
static void zram_debugfs_unregister(struct zram *zram)
{
debugfs_remove_recursive(zram->debugfs_dir);
}
#else
static void zram_debugfs_create(void) {};
static void zram_debugfs_destroy(void) {};
static void zram_accessed(struct zram *zram, u32 index) {};
static void zram_reset_access(struct zram *zram, u32 index) {};
static void zram_debugfs_register(struct zram *zram) {};
static void zram_debugfs_unregister(struct zram *zram) {};
#endif
/*
* We switched to per-cpu streams and this attr is not needed anymore.
* However, we will keep it around for some time, because:
* a) we may revert per-cpu streams in the future
* b) it's visible to user space and we need to follow our 2 years
* retirement rule; but we already have a number of 'soon to be
* altered' attrs, so max_comp_streams need to wait for the next
* layoff cycle.
*/
static ssize_t max_comp_streams_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
return scnprintf(buf, PAGE_SIZE, "%d\n", num_online_cpus());
}
static ssize_t max_comp_streams_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t len)
{
return len;
}
static ssize_t comp_algorithm_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
size_t sz;
struct zram *zram = dev_to_zram(dev);
down_read(&zram->init_lock);
sz = zcomp_available_show(zram->compressor, buf);
up_read(&zram->init_lock);
return sz;
}
static ssize_t comp_algorithm_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t len)
{
struct zram *zram = dev_to_zram(dev);
char compressor[ARRAY_SIZE(zram->compressor)];
size_t sz;
strlcpy(compressor, buf, sizeof(compressor));
/* ignore trailing newline */
sz = strlen(compressor);
if (sz > 0 && compressor[sz - 1] == '\n')
compressor[sz - 1] = 0x00;
if (!zcomp_available_algorithm(compressor))
return -EINVAL;
down_write(&zram->init_lock);
if (init_done(zram)) {
up_write(&zram->init_lock);
pr_info("Can't change algorithm for initialized device\n");
return -EBUSY;
}
strcpy(zram->compressor, compressor);
up_write(&zram->init_lock);
return len;
}
static ssize_t compact_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t len)
{
struct zram *zram = dev_to_zram(dev);
down_read(&zram->init_lock);
if (!init_done(zram)) {
up_read(&zram->init_lock);
return -EINVAL;
}
zs_compact(zram->mem_pool);
up_read(&zram->init_lock);
return len;
}
static ssize_t io_stat_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct zram *zram = dev_to_zram(dev);
ssize_t ret;
down_read(&zram->init_lock);
ret = scnprintf(buf, PAGE_SIZE,
"%8llu %8llu %8llu %8llu\n",
(u64)atomic64_read(&zram->stats.failed_reads),
(u64)atomic64_read(&zram->stats.failed_writes),
(u64)atomic64_read(&zram->stats.invalid_io),
(u64)atomic64_read(&zram->stats.notify_free));
up_read(&zram->init_lock);
return ret;
}
static ssize_t mm_stat_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct zram *zram = dev_to_zram(dev);
struct zs_pool_stats pool_stats;
u64 orig_size, mem_used = 0;
long max_used;
ssize_t ret;
memset(&pool_stats, 0x00, sizeof(struct zs_pool_stats));
down_read(&zram->init_lock);
if (init_done(zram)) {
mem_used = zs_get_total_pages(zram->mem_pool);
zs_pool_stats(zram->mem_pool, &pool_stats);
}
orig_size = atomic64_read(&zram->stats.pages_stored);
max_used = atomic_long_read(&zram->stats.max_used_pages);
ret = scnprintf(buf, PAGE_SIZE,
"%8llu %8llu %8llu %8lu %8ld %8llu %8lu %8llu\n",
orig_size << PAGE_SHIFT,
(u64)atomic64_read(&zram->stats.compr_data_size),
mem_used << PAGE_SHIFT,
zram->limit_pages << PAGE_SHIFT,
max_used << PAGE_SHIFT,
(u64)atomic64_read(&zram->stats.same_pages),
pool_stats.pages_compacted,
(u64)atomic64_read(&zram->stats.huge_pages));
up_read(&zram->init_lock);
return ret;
}
static ssize_t debug_stat_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
int version = 1;
struct zram *zram = dev_to_zram(dev);
ssize_t ret;
down_read(&zram->init_lock);
ret = scnprintf(buf, PAGE_SIZE,
"version: %d\n%8llu\n",
version,
(u64)atomic64_read(&zram->stats.writestall));
up_read(&zram->init_lock);
return ret;
}
static DEVICE_ATTR_RO(io_stat);
static DEVICE_ATTR_RO(mm_stat);
static DEVICE_ATTR_RO(debug_stat);
static void zram_meta_free(struct zram *zram, u64 disksize)
{
size_t num_pages = disksize >> PAGE_SHIFT;
size_t index;
/* Free all pages that are still in this zram device */
for (index = 0; index < num_pages; index++)
zram_free_page(zram, index);
zs_destroy_pool(zram->mem_pool);
vfree(zram->table);
}
static bool zram_meta_alloc(struct zram *zram, u64 disksize)
{
size_t num_pages;
num_pages = disksize >> PAGE_SHIFT;
zram->table = vzalloc(array_size(num_pages, sizeof(*zram->table)));
if (!zram->table)
return false;
zram->mem_pool = zs_create_pool(zram->disk->disk_name);
if (!zram->mem_pool) {
vfree(zram->table);
return false;
}
if (!huge_class_size)
huge_class_size = zs_huge_class_size(zram->mem_pool);
return true;
}
/*
* To protect concurrent access to the same index entry,
* caller should hold this table index entry's bit_spinlock to
* indicate this index entry is accessing.
*/
static void zram_free_page(struct zram *zram, size_t index)
{
unsigned long handle;
zram_reset_access(zram, index);
if (zram_test_flag(zram, index, ZRAM_HUGE)) {
zram_clear_flag(zram, index, ZRAM_HUGE);
atomic64_dec(&zram->stats.huge_pages);
}
if (zram_wb_enabled(zram) && zram_test_flag(zram, index, ZRAM_WB)) {
zram_wb_clear(zram, index);
atomic64_dec(&zram->stats.pages_stored);
return;
}
/*
* No memory is allocated for same element filled pages.
* Simply clear same page flag.
*/
if (zram_test_flag(zram, index, ZRAM_SAME)) {
zram_clear_flag(zram, index, ZRAM_SAME);
zram_set_element(zram, index, 0);
atomic64_dec(&zram->stats.same_pages);
atomic64_dec(&zram->stats.pages_stored);
return;
}
handle = zram_get_handle(zram, index);
if (!handle)
return;
zs_free(zram->mem_pool, handle);
atomic64_sub(zram_get_obj_size(zram, index),
&zram->stats.compr_data_size);
atomic64_dec(&zram->stats.pages_stored);
zram_set_handle(zram, index, 0);
zram_set_obj_size(zram, index, 0);
}
static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index,
struct bio *bio, bool partial_io)
{
int ret;
unsigned long handle;
unsigned int size;
void *src, *dst;
if (zram_wb_enabled(zram)) {
zram_slot_lock(zram, index);
if (zram_test_flag(zram, index, ZRAM_WB)) {
struct bio_vec bvec;
zram_slot_unlock(zram, index);
bvec.bv_page = page;
bvec.bv_len = PAGE_SIZE;
bvec.bv_offset = 0;
return read_from_bdev(zram, &bvec,
zram_get_element(zram, index),
bio, partial_io);
}
zram_slot_unlock(zram, index);
}
zram_slot_lock(zram, index);
handle = zram_get_handle(zram, index);
if (!handle || zram_test_flag(zram, index, ZRAM_SAME)) {
unsigned long value;
void *mem;
value = handle ? zram_get_element(zram, index) : 0;
mem = kmap_atomic(page);
zram_fill_page(mem, PAGE_SIZE, value);
kunmap_atomic(mem);
zram_slot_unlock(zram, index);
return 0;
}
size = zram_get_obj_size(zram, index);
src = zs_map_object(zram->mem_pool, handle, ZS_MM_RO);
if (size == PAGE_SIZE) {
dst = kmap_atomic(page);
memcpy(dst, src, PAGE_SIZE);
kunmap_atomic(dst);
ret = 0;
} else {
struct zcomp_strm *zstrm = zcomp_stream_get(zram->comp);
dst = kmap_atomic(page);
ret = zcomp_decompress(zstrm, src, size, dst);
kunmap_atomic(dst);
zcomp_stream_put(zram->comp);
}
zs_unmap_object(zram->mem_pool, handle);
zram_slot_unlock(zram, index);
/* Should NEVER happen. Return bio error if it does. */
if (unlikely(ret))
pr_err("Decompression failed! err=%d, page=%u\n", ret, index);
return ret;
}
static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
u32 index, int offset, struct bio *bio)
{
int ret;
struct page *page;
page = bvec->bv_page;
if (is_partial_io(bvec)) {
/* Use a temporary buffer to decompress the page */
page = alloc_page(GFP_NOIO|__GFP_HIGHMEM);
if (!page)
return -ENOMEM;
}
ret = __zram_bvec_read(zram, page, index, bio, is_partial_io(bvec));
if (unlikely(ret))
goto out;
if (is_partial_io(bvec)) {
void *dst = kmap_atomic(bvec->bv_page);
void *src = kmap_atomic(page);
memcpy(dst + bvec->bv_offset, src + offset, bvec->bv_len);
kunmap_atomic(src);
kunmap_atomic(dst);
}
out:
if (is_partial_io(bvec))
__free_page(page);
return ret;
}
static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
u32 index, struct bio *bio)
{
int ret = 0;
unsigned long alloced_pages;
unsigned long handle = 0;
unsigned int comp_len = 0;
void *src, *dst, *mem;
struct zcomp_strm *zstrm;
struct page *page = bvec->bv_page;
unsigned long element = 0;
enum zram_pageflags flags = 0;
bool allow_wb = true;
mem = kmap_atomic(page);
if (page_same_filled(mem, &element)) {
kunmap_atomic(mem);
/* Free memory associated with this sector now. */
flags = ZRAM_SAME;
atomic64_inc(&zram->stats.same_pages);
goto out;
}
kunmap_atomic(mem);
compress_again:
zstrm = zcomp_stream_get(zram->comp);
src = kmap_atomic(page);
ret = zcomp_compress(zstrm, src, &comp_len);
kunmap_atomic(src);
if (unlikely(ret)) {
zcomp_stream_put(zram->comp);
pr_err("Compression failed! err=%d\n", ret);
zs_free(zram->mem_pool, handle);
return ret;
}
if (unlikely(comp_len >= huge_class_size)) {
comp_len = PAGE_SIZE;
if (zram_wb_enabled(zram) && allow_wb) {
zcomp_stream_put(zram->comp);
ret = write_to_bdev(zram, bvec, index, bio, &element);
if (!ret) {
flags = ZRAM_WB;
ret = 1;
goto out;
}
allow_wb = false;
goto compress_again;
}
}
/*
* handle allocation has 2 paths:
* a) fast path is executed with preemption disabled (for
* per-cpu streams) and has __GFP_DIRECT_RECLAIM bit clear,
* since we can't sleep;
* b) slow path enables preemption and attempts to allocate
* the page with __GFP_DIRECT_RECLAIM bit set. we have to
* put per-cpu compression stream and, thus, to re-do
* the compression once handle is allocated.
*
* if we have a 'non-null' handle here then we are coming
* from the slow path and handle has already been allocated.
*/
if (!handle)
handle = zs_malloc(zram->mem_pool, comp_len,
__GFP_KSWAPD_RECLAIM |
__GFP_NOWARN |
__GFP_HIGHMEM |
__GFP_MOVABLE);
if (!handle) {
zcomp_stream_put(zram->comp);
atomic64_inc(&zram->stats.writestall);
handle = zs_malloc(zram->mem_pool, comp_len,
GFP_NOIO | __GFP_HIGHMEM |
__GFP_MOVABLE);
if (handle)
goto compress_again;
return -ENOMEM;
}
alloced_pages = zs_get_total_pages(zram->mem_pool);
update_used_max(zram, alloced_pages);
if (zram->limit_pages && alloced_pages > zram->limit_pages) {
zcomp_stream_put(zram->comp);
zs_free(zram->mem_pool, handle);
return -ENOMEM;
}
dst = zs_map_object(zram->mem_pool, handle, ZS_MM_WO);
src = zstrm->buffer;
if (comp_len == PAGE_SIZE)
src = kmap_atomic(page);
memcpy(dst, src, comp_len);
if (comp_len == PAGE_SIZE)
kunmap_atomic(src);
zcomp_stream_put(zram->comp);
zs_unmap_object(zram->mem_pool, handle);
atomic64_add(comp_len, &zram->stats.compr_data_size);
out:
/*
* Free memory associated with this sector
* before overwriting unused sectors.
*/
zram_slot_lock(zram, index);
zram_free_page(zram, index);
if (comp_len == PAGE_SIZE) {
zram_set_flag(zram, index, ZRAM_HUGE);
atomic64_inc(&zram->stats.huge_pages);
}
if (flags) {
zram_set_flag(zram, index, flags);
zram_set_element(zram, index, element);
} else {
zram_set_handle(zram, index, handle);
zram_set_obj_size(zram, index, comp_len);
}
zram_slot_unlock(zram, index);
/* Update stats */
atomic64_inc(&zram->stats.pages_stored);
return ret;
}
static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
u32 index, int offset, struct bio *bio)
{
int ret;
struct page *page = NULL;
void *src;
struct bio_vec vec;
vec = *bvec;
if (is_partial_io(bvec)) {
void *dst;
/*
* This is a partial IO. We need to read the full page
* before to write the changes.
*/
page = alloc_page(GFP_NOIO|__GFP_HIGHMEM);
if (!page)
return -ENOMEM;
ret = __zram_bvec_read(zram, page, index, bio, true);
if (ret)
goto out;
src = kmap_atomic(bvec->bv_page);
dst = kmap_atomic(page);
memcpy(dst + offset, src + bvec->bv_offset, bvec->bv_len);
kunmap_atomic(dst);
kunmap_atomic(src);
vec.bv_page = page;
vec.bv_len = PAGE_SIZE;
vec.bv_offset = 0;
}
ret = __zram_bvec_write(zram, &vec, index, bio);
out:
if (is_partial_io(bvec))
__free_page(page);
return ret;
}
/*
* zram_bio_discard - handler on discard request
* @index: physical block index in PAGE_SIZE units
* @offset: byte offset within physical block
*/
static void zram_bio_discard(struct zram *zram, u32 index,
int offset, struct bio *bio)
{
size_t n = bio->bi_iter.bi_size;
/*
* zram manages data in physical block size units. Because logical block
* size isn't identical with physical block size on some arch, we
* could get a discard request pointing to a specific offset within a
* certain physical block. Although we can handle this request by
* reading that physiclal block and decompressing and partially zeroing
* and re-compressing and then re-storing it, this isn't reasonable
* because our intent with a discard request is to save memory. So
* skipping this logical block is appropriate here.
*/
if (offset) {
if (n <= (PAGE_SIZE - offset))
return;
n -= (PAGE_SIZE - offset);
index++;
}
while (n >= PAGE_SIZE) {
zram_slot_lock(zram, index);
zram_free_page(zram, index);
zram_slot_unlock(zram, index);
atomic64_inc(&zram->stats.notify_free);
index++;
n -= PAGE_SIZE;
}
}
/*
* Returns errno if it has some problem. Otherwise return 0 or 1.
* Returns 0 if IO request was done synchronously
* Returns 1 if IO request was successfully submitted.
*/
static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
int offset, unsigned int op, struct bio *bio)
{
unsigned long start_time = jiffies;
struct request_queue *q = zram->disk->queue;
int ret;
generic_start_io_acct(q, op, bvec->bv_len >> SECTOR_SHIFT,
&zram->disk->part0);
if (!op_is_write(op)) {
atomic64_inc(&zram->stats.num_reads);
ret = zram_bvec_read(zram, bvec, index, offset, bio);
flush_dcache_page(bvec->bv_page);
} else {
atomic64_inc(&zram->stats.num_writes);
ret = zram_bvec_write(zram, bvec, index, offset, bio);
}
generic_end_io_acct(q, op, &zram->disk->part0, start_time);
zram_slot_lock(zram, index);
zram_accessed(zram, index);
zram_slot_unlock(zram, index);
if (unlikely(ret < 0)) {
if (!op_is_write(op))
atomic64_inc(&zram->stats.failed_reads);
else
atomic64_inc(&zram->stats.failed_writes);
}
return ret;
}
static void __zram_make_request(struct zram *zram, struct bio *bio)
{
int offset;
u32 index;
struct bio_vec bvec;
struct bvec_iter iter;
index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
offset = (bio->bi_iter.bi_sector &
(SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;
switch (bio_op(bio)) {
case REQ_OP_DISCARD:
case REQ_OP_WRITE_ZEROES:
zram_bio_discard(zram, index, offset, bio);
bio_endio(bio);
return;
default:
break;
}
bio_for_each_segment(bvec, bio, iter) {
struct bio_vec bv = bvec;
unsigned int unwritten = bvec.bv_len;
do {
bv.bv_len = min_t(unsigned int, PAGE_SIZE - offset,
unwritten);
if (zram_bvec_rw(zram, &bv, index, offset,
bio_op(bio), bio) < 0)
goto out;
bv.bv_offset += bv.bv_len;
unwritten -= bv.bv_len;
update_position(&index, &offset, &bv);
} while (unwritten);
}
bio_endio(bio);
return;
out:
bio_io_error(bio);
}
/*
* Handler function for all zram I/O requests.
*/
static blk_qc_t zram_make_request(struct request_queue *queue, struct bio *bio)
{
struct zram *zram = queue->queuedata;
if (!valid_io_request(zram, bio->bi_iter.bi_sector,
bio->bi_iter.bi_size)) {
atomic64_inc(&zram->stats.invalid_io);
goto error;
}
__zram_make_request(zram, bio);
return BLK_QC_T_NONE;
error:
bio_io_error(bio);
return BLK_QC_T_NONE;
}
static void zram_slot_free_notify(struct block_device *bdev,
unsigned long index)
{
struct zram *zram;
zram = bdev->bd_disk->private_data;
zram_slot_lock(zram, index);
zram_free_page(zram, index);
zram_slot_unlock(zram, index);
atomic64_inc(&zram->stats.notify_free);
}
static int zram_rw_page(struct block_device *bdev, sector_t sector,
struct page *page, unsigned int op)
{
int offset, ret;
u32 index;
struct zram *zram;
struct bio_vec bv;
if (PageTransHuge(page))
return -ENOTSUPP;
zram = bdev->bd_disk->private_data;
if (!valid_io_request(zram, sector, PAGE_SIZE)) {
atomic64_inc(&zram->stats.invalid_io);
ret = -EINVAL;
goto out;
}
index = sector >> SECTORS_PER_PAGE_SHIFT;
offset = (sector & (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;
bv.bv_page = page;
bv.bv_len = PAGE_SIZE;
bv.bv_offset = 0;
ret = zram_bvec_rw(zram, &bv, index, offset, op, NULL);
out:
/*
* If I/O fails, just return error(ie, non-zero) without
* calling page_endio.
* It causes resubmit the I/O with bio request by upper functions
* of rw_page(e.g., swap_readpage, __swap_writepage) and
* bio->bi_end_io does things to handle the error
* (e.g., SetPageError, set_page_dirty and extra works).
*/
if (unlikely(ret < 0))
return ret;
switch (ret) {
case 0:
page_endio(page, op_is_write(op), 0);
break;
case 1:
ret = 0;
break;
default:
WARN_ON(1);
}
return ret;
}
static void zram_reset_device(struct zram *zram)
{
struct zcomp *comp;
u64 disksize;
down_write(&zram->init_lock);
zram->limit_pages = 0;
if (!init_done(zram)) {
up_write(&zram->init_lock);
return;
}
comp = zram->comp;
disksize = zram->disksize;
zram->disksize = 0;
set_capacity(zram->disk, 0);
part_stat_set_all(&zram->disk->part0, 0);
up_write(&zram->init_lock);
/* I/O operation under all of CPU are done so let's free */
zram_meta_free(zram, disksize);
memset(&zram->stats, 0, sizeof(zram->stats));
zcomp_destroy(comp);
reset_bdev(zram);
}
static ssize_t disksize_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t len)
{
u64 disksize;
struct zcomp *comp;
struct zram *zram = dev_to_zram(dev);
int err;
disksize = memparse(buf, NULL);
if (!disksize)
return -EINVAL;
down_write(&zram->init_lock);
if (init_done(zram)) {
pr_info("Cannot change disksize for initialized device\n");
err = -EBUSY;
goto out_unlock;
}
disksize = PAGE_ALIGN(disksize);
if (!zram_meta_alloc(zram, disksize)) {
err = -ENOMEM;
goto out_unlock;
}
comp = zcomp_create(zram->compressor);
if (IS_ERR(comp)) {
pr_err("Cannot initialise %s compressing backend\n",
zram->compressor);
err = PTR_ERR(comp);
goto out_free_meta;
}
zram->comp = comp;
zram->disksize = disksize;
set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT);
revalidate_disk(zram->disk);
up_write(&zram->init_lock);
return len;
out_free_meta:
zram_meta_free(zram, disksize);
out_unlock:
up_write(&zram->init_lock);
return err;
}
static ssize_t reset_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t len)
{
int ret;
unsigned short do_reset;
struct zram *zram;
struct block_device *bdev;
ret = kstrtou16(buf, 10, &do_reset);
if (ret)
return ret;
if (!do_reset)
return -EINVAL;
zram = dev_to_zram(dev);
bdev = bdget_disk(zram->disk, 0);
if (!bdev)
return -ENOMEM;
mutex_lock(&bdev->bd_mutex);
/* Do not reset an active device or claimed device */
if (bdev->bd_openers || zram->claim) {
mutex_unlock(&bdev->bd_mutex);
bdput(bdev);
return -EBUSY;
}
/* From now on, anyone can't open /dev/zram[0-9] */
zram->claim = true;
mutex_unlock(&bdev->bd_mutex);
/* Make sure all the pending I/O are finished */
fsync_bdev(bdev);
zram_reset_device(zram);
revalidate_disk(zram->disk);
bdput(bdev);
mutex_lock(&bdev->bd_mutex);
zram->claim = false;
mutex_unlock(&bdev->bd_mutex);
return len;
}
static int zram_open(struct block_device *bdev, fmode_t mode)
{
int ret = 0;
struct zram *zram;
WARN_ON(!mutex_is_locked(&bdev->bd_mutex));
zram = bdev->bd_disk->private_data;
/* zram was claimed to reset so open request fails */
if (zram->claim)
ret = -EBUSY;
return ret;
}
static const struct block_device_operations zram_devops = {
.open = zram_open,
.swap_slot_free_notify = zram_slot_free_notify,
.rw_page = zram_rw_page,
.owner = THIS_MODULE
};
static DEVICE_ATTR_WO(compact);
static DEVICE_ATTR_RW(disksize);
static DEVICE_ATTR_RO(initstate);
static DEVICE_ATTR_WO(reset);
static DEVICE_ATTR_WO(mem_limit);
static DEVICE_ATTR_WO(mem_used_max);
static DEVICE_ATTR_RW(max_comp_streams);
static DEVICE_ATTR_RW(comp_algorithm);
#ifdef CONFIG_ZRAM_WRITEBACK
static DEVICE_ATTR_RW(backing_dev);
#endif
static struct attribute *zram_disk_attrs[] = {
&dev_attr_disksize.attr,
&dev_attr_initstate.attr,
&dev_attr_reset.attr,
&dev_attr_compact.attr,
&dev_attr_mem_limit.attr,
&dev_attr_mem_used_max.attr,
&dev_attr_max_comp_streams.attr,
&dev_attr_comp_algorithm.attr,
#ifdef CONFIG_ZRAM_WRITEBACK
&dev_attr_backing_dev.attr,
#endif
&dev_attr_io_stat.attr,
&dev_attr_mm_stat.attr,
&dev_attr_debug_stat.attr,
NULL,
};
static const struct attribute_group zram_disk_attr_group = {
.attrs = zram_disk_attrs,
};
/*
* Allocate and initialize new zram device. the function returns
* '>= 0' device_id upon success, and negative value otherwise.
*/
static int zram_add(void)
{
struct zram *zram;
struct request_queue *queue;
int ret, device_id;
zram = kzalloc(sizeof(struct zram), GFP_KERNEL);
if (!zram)
return -ENOMEM;
ret = idr_alloc(&zram_index_idr, zram, 0, 0, GFP_KERNEL);
if (ret < 0)
goto out_free_dev;
device_id = ret;
init_rwsem(&zram->init_lock);
queue = blk_alloc_queue(GFP_KERNEL);
if (!queue) {
pr_err("Error allocating disk queue for device %d\n",
device_id);
ret = -ENOMEM;
goto out_free_idr;
}
blk_queue_make_request(queue, zram_make_request);
/* gendisk structure */
zram->disk = alloc_disk(1);
if (!zram->disk) {
pr_err("Error allocating disk structure for device %d\n",
device_id);
ret = -ENOMEM;
goto out_free_queue;
}
zram->disk->major = zram_major;
zram->disk->first_minor = device_id;
zram->disk->fops = &zram_devops;
zram->disk->queue = queue;
zram->disk->queue->queuedata = zram;
zram->disk->private_data = zram;
snprintf(zram->disk->disk_name, 16, "zram%d", device_id);
/* Actual capacity set using syfs (/sys/block/zram<id>/disksize */
set_capacity(zram->disk, 0);
/* zram devices sort of resembles non-rotational disks */
blk_queue_flag_set(QUEUE_FLAG_NONROT, zram->disk->queue);
blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, zram->disk->queue);
/*
* To ensure that we always get PAGE_SIZE aligned
* and n*PAGE_SIZED sized I/O requests.
*/
blk_queue_physical_block_size(zram->disk->queue, PAGE_SIZE);
blk_queue_logical_block_size(zram->disk->queue,
ZRAM_LOGICAL_BLOCK_SIZE);
blk_queue_io_min(zram->disk->queue, PAGE_SIZE);
blk_queue_io_opt(zram->disk->queue, PAGE_SIZE);
zram->disk->queue->limits.discard_granularity = PAGE_SIZE;
blk_queue_max_discard_sectors(zram->disk->queue, UINT_MAX);
blk_queue_flag_set(QUEUE_FLAG_DISCARD, zram->disk->queue);
/*
* zram_bio_discard() will clear all logical blocks if logical block
* size is identical with physical block size(PAGE_SIZE). But if it is
* different, we will skip discarding some parts of logical blocks in
* the part of the request range which isn't aligned to physical block
* size. So we can't ensure that all discarded logical blocks are
* zeroed.
*/
if (ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE)
blk_queue_max_write_zeroes_sectors(zram->disk->queue, UINT_MAX);
zram->disk->queue->backing_dev_info->capabilities |=
(BDI_CAP_STABLE_WRITES | BDI_CAP_SYNCHRONOUS_IO);
add_disk(zram->disk);
ret = sysfs_create_group(&disk_to_dev(zram->disk)->kobj,
&zram_disk_attr_group);
if (ret < 0) {
pr_err("Error creating sysfs group for device %d\n",
device_id);
goto out_free_disk;
}
strlcpy(zram->compressor, default_compressor, sizeof(zram->compressor));
zram_debugfs_register(zram);
pr_info("Added device: %s\n", zram->disk->disk_name);
return device_id;
out_free_disk:
del_gendisk(zram->disk);
put_disk(zram->disk);
out_free_queue:
blk_cleanup_queue(queue);
out_free_idr:
idr_remove(&zram_index_idr, device_id);
out_free_dev:
kfree(zram);
return ret;
}
static int zram_remove(struct zram *zram)
{
struct block_device *bdev;
bdev = bdget_disk(zram->disk, 0);
if (!bdev)
return -ENOMEM;
mutex_lock(&bdev->bd_mutex);
if (bdev->bd_openers || zram->claim) {
mutex_unlock(&bdev->bd_mutex);
bdput(bdev);
return -EBUSY;
}
zram->claim = true;
mutex_unlock(&bdev->bd_mutex);
zram_debugfs_unregister(zram);
/*
* Remove sysfs first, so no one will perform a disksize
* store while we destroy the devices. This also helps during
* hot_remove -- zram_reset_device() is the last holder of
* ->init_lock, no later/concurrent disksize_store() or any
* other sysfs handlers are possible.
*/
sysfs_remove_group(&disk_to_dev(zram->disk)->kobj,
&zram_disk_attr_group);
/* Make sure all the pending I/O are finished */
fsync_bdev(bdev);
zram_reset_device(zram);
bdput(bdev);
pr_info("Removed device: %s\n", zram->disk->disk_name);
del_gendisk(zram->disk);
blk_cleanup_queue(zram->disk->queue);
put_disk(zram->disk);
kfree(zram);
return 0;
}
/* zram-control sysfs attributes */
/*
* NOTE: hot_add attribute is not the usual read-only sysfs attribute. In a
* sense that reading from this file does alter the state of your system -- it
* creates a new un-initialized zram device and returns back this device's
* device_id (or an error code if it fails to create a new device).
*/
static ssize_t hot_add_show(struct class *class,
struct class_attribute *attr,
char *buf)
{
int ret;
mutex_lock(&zram_index_mutex);
ret = zram_add();
mutex_unlock(&zram_index_mutex);
if (ret < 0)
return ret;
return scnprintf(buf, PAGE_SIZE, "%d\n", ret);
}
static CLASS_ATTR_RO(hot_add);
static ssize_t hot_remove_store(struct class *class,
struct class_attribute *attr,
const char *buf,
size_t count)
{
struct zram *zram;
int ret, dev_id;
/* dev_id is gendisk->first_minor, which is `int' */
ret = kstrtoint(buf, 10, &dev_id);
if (ret)
return ret;
if (dev_id < 0)
return -EINVAL;
mutex_lock(&zram_index_mutex);
zram = idr_find(&zram_index_idr, dev_id);
if (zram) {
ret = zram_remove(zram);
if (!ret)
idr_remove(&zram_index_idr, dev_id);
} else {
ret = -ENODEV;
}
mutex_unlock(&zram_index_mutex);
return ret ? ret : count;
}
static CLASS_ATTR_WO(hot_remove);
static struct attribute *zram_control_class_attrs[] = {
&class_attr_hot_add.attr,
&class_attr_hot_remove.attr,
NULL,
};
ATTRIBUTE_GROUPS(zram_control_class);
static struct class zram_control_class = {
.name = "zram-control",
.owner = THIS_MODULE,
.class_groups = zram_control_class_groups,
};
static int zram_remove_cb(int id, void *ptr, void *data)
{
zram_remove(ptr);
return 0;
}
static void destroy_devices(void)
{
class_unregister(&zram_control_class);
idr_for_each(&zram_index_idr, &zram_remove_cb, NULL);
zram_debugfs_destroy();
idr_destroy(&zram_index_idr);
unregister_blkdev(zram_major, "zram");
cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE);
}
static int __init zram_init(void)
{
int ret;
ret = cpuhp_setup_state_multi(CPUHP_ZCOMP_PREPARE, "block/zram:prepare",
zcomp_cpu_up_prepare, zcomp_cpu_dead);
if (ret < 0)
return ret;
ret = class_register(&zram_control_class);
if (ret) {
pr_err("Unable to register zram-control class\n");
cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE);
return ret;
}
zram_debugfs_create();
zram_major = register_blkdev(0, "zram");
if (zram_major <= 0) {
pr_err("Unable to get major number\n");
class_unregister(&zram_control_class);
cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE);
return -EBUSY;
}
while (num_devices != 0) {
mutex_lock(&zram_index_mutex);
ret = zram_add();
mutex_unlock(&zram_index_mutex);
if (ret < 0)
goto out_error;
num_devices--;
}
return 0;
out_error:
destroy_devices();
return ret;
}
static void __exit zram_exit(void)
{
destroy_devices();
}
module_init(zram_init);
module_exit(zram_exit);
module_param(num_devices, uint, 0);
MODULE_PARM_DESC(num_devices, "Number of pre-created zram devices");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>");
MODULE_DESCRIPTION("Compressed RAM Block Device");