diff --git a/MAINTAINERS b/MAINTAINERS index 7578deb8ff20..51ebb779c5f3 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9960,7 +9960,7 @@ F: drivers/net/hamradio/*scc.c F: drivers/net/hamradio/z8530.h ZBUD COMPRESSED PAGE ALLOCATOR -M: Seth Jennings +M: Seth Jennings L: linux-mm@kvack.org S: Maintained F: mm/zbud.c @@ -10005,7 +10005,7 @@ F: mm/zsmalloc.c F: include/linux/zsmalloc.h ZSWAP COMPRESSED SWAP CACHING -M: Seth Jennings +M: Seth Jennings L: linux-mm@kvack.org S: Maintained F: mm/zswap.c diff --git a/drivers/char/agp/frontend.c b/drivers/char/agp/frontend.c index 8121b4c70ede..b29703324e94 100644 --- a/drivers/char/agp/frontend.c +++ b/drivers/char/agp/frontend.c @@ -730,6 +730,7 @@ static int agpioc_info_wrap(struct agp_file_private *priv, void __user *arg) agp_copy_info(agp_bridge, &kerninfo); + memset(&userinfo, 0, sizeof(userinfo)); userinfo.version.major = kerninfo.version.major; userinfo.version.minor = kerninfo.version.minor; userinfo.bridge_id = kerninfo.device->vendor | diff --git a/drivers/rtc/rtc-pcf8523.c b/drivers/rtc/rtc-pcf8523.c index 5c8f8226c848..4cdb64be061b 100644 --- a/drivers/rtc/rtc-pcf8523.c +++ b/drivers/rtc/rtc-pcf8523.c @@ -206,7 +206,7 @@ static int pcf8523_rtc_read_time(struct device *dev, struct rtc_time *tm) tm->tm_hour = bcd2bin(regs[2] & 0x3f); tm->tm_mday = bcd2bin(regs[3] & 0x3f); tm->tm_wday = regs[4] & 0x7; - tm->tm_mon = bcd2bin(regs[5] & 0x1f); + tm->tm_mon = bcd2bin(regs[5] & 0x1f) - 1; tm->tm_year = bcd2bin(regs[6]) + 100; return rtc_valid_tm(tm); @@ -229,7 +229,7 @@ static int pcf8523_rtc_set_time(struct device *dev, struct rtc_time *tm) regs[3] = bin2bcd(tm->tm_hour); regs[4] = bin2bcd(tm->tm_mday); regs[5] = tm->tm_wday; - regs[6] = bin2bcd(tm->tm_mon); + regs[6] = bin2bcd(tm->tm_mon + 1); regs[7] = bin2bcd(tm->tm_year - 100); msg.addr = client->addr; diff --git a/fs/affs/super.c b/fs/affs/super.c index 6d589f28bf9b..895ac7dc9dbf 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -340,8 +340,6 @@ static int affs_fill_super(struct super_block *sb, void *data, int silent) &blocksize,&sbi->s_prefix, sbi->s_volume, &mount_flags)) { printk(KERN_ERR "AFFS: Error parsing options\n"); - kfree(sbi->s_prefix); - kfree(sbi); return -EINVAL; } /* N.B. after this point s_prefix must be released */ diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 2caf36ac3e93..cc87c1abac97 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -179,7 +179,7 @@ static struct dentry *autofs4_lookup_active(struct dentry *dentry) spin_lock(&active->d_lock); /* Already gone? */ - if (!d_count(active)) + if ((int) d_count(active) <= 0) goto next; qstr = &active->d_name; @@ -230,7 +230,7 @@ static struct dentry *autofs4_lookup_expiring(struct dentry *dentry) spin_lock(&expiring->d_lock); - /* Bad luck, we've already been dentry_iput */ + /* We've already been dentry_iput or unlinked */ if (!expiring->d_inode) goto next; diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 204027520937..e19d4c0cacae 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -1030,6 +1030,11 @@ static int __init init_hugetlbfs_fs(void) int error; int i; + if (!hugepages_supported()) { + pr_info("hugetlbfs: disabling because there are no supported hugepage sizes\n"); + return -ENOTSUPP; + } + error = bdi_init(&hugetlbfs_backing_dev_info); if (error) return error; diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 4e565c814309..732648b270dc 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -698,6 +698,8 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) } group->overflow_event = &oevent->fse; + if (force_o_largefile()) + event_f_flags |= O_LARGEFILE; group->fanotify_data.f_flags = event_f_flags; #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS spin_lock_init(&group->fanotify_data.access_lock); diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 5b337cf8fb86..b65166de1d9d 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -412,6 +412,16 @@ static inline spinlock_t *huge_pte_lockptr(struct hstate *h, return &mm->page_table_lock; } +static inline bool hugepages_supported(void) +{ + /* + * Some platform decide whether they support huge pages at boot + * time. On these, such as powerpc, HPAGE_SHIFT is set to 0 when + * there is no such support + */ + return HPAGE_SHIFT != 0; +} + #else /* CONFIG_HUGETLB_PAGE */ struct hstate {}; #define alloc_huge_page_node(h, nid) NULL diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index f2f7398848cf..d82abd40a3c0 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -101,4 +101,13 @@ struct kmem_cache { struct kmem_cache_node *node[MAX_NUMNODES]; }; +#ifdef CONFIG_SYSFS +#define SLAB_SUPPORTS_SYSFS +void sysfs_slab_remove(struct kmem_cache *); +#else +static inline void sysfs_slab_remove(struct kmem_cache *s) +{ +} +#endif + #endif /* _LINUX_SLUB_DEF_H */ diff --git a/mm/compaction.c b/mm/compaction.c index 37f976287068..627dc2e4320f 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -671,16 +671,20 @@ static void isolate_freepages(struct zone *zone, struct compact_control *cc) { struct page *page; - unsigned long high_pfn, low_pfn, pfn, z_end_pfn, end_pfn; + unsigned long high_pfn, low_pfn, pfn, z_end_pfn; int nr_freepages = cc->nr_freepages; struct list_head *freelist = &cc->freepages; /* * Initialise the free scanner. The starting point is where we last - * scanned from (or the end of the zone if starting). The low point - * is the end of the pageblock the migration scanner is using. + * successfully isolated from, zone-cached value, or the end of the + * zone when isolating for the first time. We need this aligned to + * the pageblock boundary, because we do pfn -= pageblock_nr_pages + * in the for loop. + * The low boundary is the end of the pageblock the migration scanner + * is using. */ - pfn = cc->free_pfn; + pfn = cc->free_pfn & ~(pageblock_nr_pages-1); low_pfn = ALIGN(cc->migrate_pfn + 1, pageblock_nr_pages); /* @@ -700,6 +704,7 @@ static void isolate_freepages(struct zone *zone, for (; pfn >= low_pfn && cc->nr_migratepages > nr_freepages; pfn -= pageblock_nr_pages) { unsigned long isolated; + unsigned long end_pfn; /* * This can iterate a massively long zone without finding any @@ -734,13 +739,10 @@ static void isolate_freepages(struct zone *zone, isolated = 0; /* - * As pfn may not start aligned, pfn+pageblock_nr_page - * may cross a MAX_ORDER_NR_PAGES boundary and miss - * a pfn_valid check. Ensure isolate_freepages_block() - * only scans within a pageblock + * Take care when isolating in last pageblock of a zone which + * ends in the middle of a pageblock. */ - end_pfn = ALIGN(pfn + 1, pageblock_nr_pages); - end_pfn = min(end_pfn, z_end_pfn); + end_pfn = min(pfn + pageblock_nr_pages, z_end_pfn); isolated = isolate_freepages_block(cc, pfn, end_pfn, freelist, false); nr_freepages += isolated; diff --git a/mm/filemap.c b/mm/filemap.c index 5020b280a771..000a220e2a41 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -906,8 +906,8 @@ EXPORT_SYMBOL(page_cache_prev_hole); * Looks up the page cache slot at @mapping & @offset. If there is a * page cache page, it is returned with an increased refcount. * - * If the slot holds a shadow entry of a previously evicted page, it - * is returned. + * If the slot holds a shadow entry of a previously evicted page, or a + * swap entry from shmem/tmpfs, it is returned. * * Otherwise, %NULL is returned. */ @@ -928,9 +928,9 @@ struct page *find_get_entry(struct address_space *mapping, pgoff_t offset) if (radix_tree_deref_retry(page)) goto repeat; /* - * Otherwise, shmem/tmpfs must be storing a swap entry - * here as an exceptional entry: so return it without - * attempting to raise page count. + * A shadow entry of a recently evicted page, + * or a swap entry from shmem/tmpfs. Return + * it without attempting to raise page count. */ goto out; } @@ -983,8 +983,8 @@ EXPORT_SYMBOL(find_get_page); * page cache page, it is returned locked and with an increased * refcount. * - * If the slot holds a shadow entry of a previously evicted page, it - * is returned. + * If the slot holds a shadow entry of a previously evicted page, or a + * swap entry from shmem/tmpfs, it is returned. * * Otherwise, %NULL is returned. * @@ -1099,8 +1099,8 @@ EXPORT_SYMBOL(find_or_create_page); * with ascending indexes. There may be holes in the indices due to * not-present pages. * - * Any shadow entries of evicted pages are included in the returned - * array. + * Any shadow entries of evicted pages, or swap entries from + * shmem/tmpfs, are included in the returned array. * * find_get_entries() returns the number of pages and shadow entries * which were found. @@ -1128,9 +1128,9 @@ unsigned find_get_entries(struct address_space *mapping, if (radix_tree_deref_retry(page)) goto restart; /* - * Otherwise, we must be storing a swap entry - * here as an exceptional entry: so return it - * without attempting to raise page count. + * A shadow entry of a recently evicted page, + * or a swap entry from shmem/tmpfs. Return + * it without attempting to raise page count. */ goto export; } @@ -1198,9 +1198,9 @@ unsigned find_get_pages(struct address_space *mapping, pgoff_t start, goto restart; } /* - * Otherwise, shmem/tmpfs must be storing a swap entry - * here as an exceptional entry: so skip over it - - * we only reach this from invalidate_mapping_pages(). + * A shadow entry of a recently evicted page, + * or a swap entry from shmem/tmpfs. Skip + * over it. */ continue; } @@ -1265,9 +1265,9 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index, goto restart; } /* - * Otherwise, shmem/tmpfs must be storing a swap entry - * here as an exceptional entry: so stop looking for - * contiguous pages. + * A shadow entry of a recently evicted page, + * or a swap entry from shmem/tmpfs. Stop + * looking for contiguous pages. */ break; } @@ -1341,10 +1341,17 @@ unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index, goto restart; } /* - * This function is never used on a shmem/tmpfs - * mapping, so a swap entry won't be found here. + * A shadow entry of a recently evicted page. + * + * Those entries should never be tagged, but + * this tree walk is lockless and the tags are + * looked up in bulk, one radix tree node at a + * time, so there is a sizable window for page + * reclaim to evict a page we saw tagged. + * + * Skip over it. */ - BUG(); + continue; } if (!page_cache_get_speculative(page)) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 246192929a2d..c82290b9c1fc 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1981,11 +1981,7 @@ static int __init hugetlb_init(void) { int i; - /* Some platform decide whether they support huge pages at boot - * time. On these, such as powerpc, HPAGE_SHIFT is set to 0 when - * there is no such support - */ - if (HPAGE_SHIFT == 0) + if (!hugepages_supported()) return 0; if (!size_to_hstate(default_hstate_size)) { @@ -2112,6 +2108,9 @@ static int hugetlb_sysctl_handler_common(bool obey_mempolicy, unsigned long tmp; int ret; + if (!hugepages_supported()) + return -ENOTSUPP; + tmp = h->max_huge_pages; if (write && h->order >= MAX_ORDER) @@ -2165,6 +2164,9 @@ int hugetlb_overcommit_handler(struct ctl_table *table, int write, unsigned long tmp; int ret; + if (!hugepages_supported()) + return -ENOTSUPP; + tmp = h->nr_overcommit_huge_pages; if (write && h->order >= MAX_ORDER) @@ -2190,6 +2192,8 @@ int hugetlb_overcommit_handler(struct ctl_table *table, int write, void hugetlb_report_meminfo(struct seq_file *m) { struct hstate *h = &default_hstate; + if (!hugepages_supported()) + return; seq_printf(m, "HugePages_Total: %5lu\n" "HugePages_Free: %5lu\n" @@ -2206,6 +2210,8 @@ void hugetlb_report_meminfo(struct seq_file *m) int hugetlb_report_node_meminfo(int nid, char *buf) { struct hstate *h = &default_hstate; + if (!hugepages_supported()) + return 0; return sprintf(buf, "Node %d HugePages_Total: %5u\n" "Node %d HugePages_Free: %5u\n" @@ -2220,6 +2226,9 @@ void hugetlb_show_meminfo(void) struct hstate *h; int nid; + if (!hugepages_supported()) + return; + for_each_node_state(nid, N_MEMORY) for_each_hstate(h) pr_info("Node %d hugepages_total=%u hugepages_free=%u hugepages_surp=%u hugepages_size=%lukB\n", diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 29501f040568..c47dffdcb246 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -6686,16 +6686,20 @@ static struct page *mc_handle_file_pte(struct vm_area_struct *vma, pgoff = pte_to_pgoff(ptent); /* page is moved even if it's not RSS of this task(page-faulted). */ - page = find_get_page(mapping, pgoff); - #ifdef CONFIG_SWAP /* shmem/tmpfs may report page out on swap: account for that too. */ - if (radix_tree_exceptional_entry(page)) { - swp_entry_t swap = radix_to_swp_entry(page); - if (do_swap_account) - *entry = swap; - page = find_get_page(swap_address_space(swap), swap.val); - } + if (shmem_mapping(mapping)) { + page = find_get_entry(mapping, pgoff); + if (radix_tree_exceptional_entry(page)) { + swp_entry_t swp = radix_to_swp_entry(page); + if (do_swap_account) + *entry = swp; + page = find_get_page(swap_address_space(swp), swp.val); + } + } else + page = find_get_page(mapping, pgoff); +#else + page = find_get_page(mapping, pgoff); #endif return page; } diff --git a/mm/page-writeback.c b/mm/page-writeback.c index ef413492a149..a4317da60532 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -593,14 +593,14 @@ unsigned long bdi_dirty_limit(struct backing_dev_info *bdi, unsigned long dirty) * (5) the closer to setpoint, the smaller |df/dx| (and the reverse) * => fast response on large errors; small oscillation near setpoint */ -static inline long long pos_ratio_polynom(unsigned long setpoint, +static long long pos_ratio_polynom(unsigned long setpoint, unsigned long dirty, unsigned long limit) { long long pos_ratio; long x; - x = div_s64(((s64)setpoint - (s64)dirty) << RATELIMIT_CALC_SHIFT, + x = div64_s64(((s64)setpoint - (s64)dirty) << RATELIMIT_CALC_SHIFT, limit - setpoint + 1); pos_ratio = x; pos_ratio = pos_ratio * x >> RATELIMIT_CALC_SHIFT; @@ -842,7 +842,7 @@ static unsigned long bdi_position_ratio(struct backing_dev_info *bdi, x_intercept = bdi_setpoint + span; if (bdi_dirty < x_intercept - span / 4) { - pos_ratio = div_u64(pos_ratio * (x_intercept - bdi_dirty), + pos_ratio = div64_u64(pos_ratio * (x_intercept - bdi_dirty), x_intercept - bdi_setpoint + 1); } else pos_ratio /= 4; diff --git a/mm/slab.h b/mm/slab.h index 3045316b7c9d..6bd4c353704f 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -91,6 +91,7 @@ __kmem_cache_alias(const char *name, size_t size, size_t align, #define CACHE_CREATE_MASK (SLAB_CORE_FLAGS | SLAB_DEBUG_FLAGS | SLAB_CACHE_FLAGS) int __kmem_cache_shutdown(struct kmem_cache *); +void slab_kmem_cache_release(struct kmem_cache *); struct seq_file; struct file; diff --git a/mm/slab_common.c b/mm/slab_common.c index f3cfccf76dda..102cc6fca3d3 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -323,6 +323,12 @@ static int kmem_cache_destroy_memcg_children(struct kmem_cache *s) } #endif /* CONFIG_MEMCG_KMEM */ +void slab_kmem_cache_release(struct kmem_cache *s) +{ + kfree(s->name); + kmem_cache_free(kmem_cache, s); +} + void kmem_cache_destroy(struct kmem_cache *s) { get_online_cpus(); @@ -352,8 +358,11 @@ void kmem_cache_destroy(struct kmem_cache *s) rcu_barrier(); memcg_free_cache_params(s); - kfree(s->name); - kmem_cache_free(kmem_cache, s); +#ifdef SLAB_SUPPORTS_SYSFS + sysfs_slab_remove(s); +#else + slab_kmem_cache_release(s); +#endif goto out_put_cpus; out_unlock: diff --git a/mm/slub.c b/mm/slub.c index 5e234f1f8853..2b1ce697fc4b 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -210,14 +210,11 @@ enum track_item { TRACK_ALLOC, TRACK_FREE }; #ifdef CONFIG_SYSFS static int sysfs_slab_add(struct kmem_cache *); static int sysfs_slab_alias(struct kmem_cache *, const char *); -static void sysfs_slab_remove(struct kmem_cache *); static void memcg_propagate_slab_attrs(struct kmem_cache *s); #else static inline int sysfs_slab_add(struct kmem_cache *s) { return 0; } static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p) { return 0; } -static inline void sysfs_slab_remove(struct kmem_cache *s) { } - static inline void memcg_propagate_slab_attrs(struct kmem_cache *s) { } #endif @@ -3238,24 +3235,7 @@ static inline int kmem_cache_close(struct kmem_cache *s) int __kmem_cache_shutdown(struct kmem_cache *s) { - int rc = kmem_cache_close(s); - - if (!rc) { - /* - * Since slab_attr_store may take the slab_mutex, we should - * release the lock while removing the sysfs entry in order to - * avoid a deadlock. Because this is pretty much the last - * operation we do and the lock will be released shortly after - * that in slab_common.c, we could just move sysfs_slab_remove - * to a later point in common code. We should do that when we - * have a common sysfs framework for all allocators. - */ - mutex_unlock(&slab_mutex); - sysfs_slab_remove(s); - mutex_lock(&slab_mutex); - } - - return rc; + return kmem_cache_close(s); } /******************************************************************** @@ -5071,15 +5051,18 @@ static void memcg_propagate_slab_attrs(struct kmem_cache *s) #ifdef CONFIG_MEMCG_KMEM int i; char *buffer = NULL; + struct kmem_cache *root_cache; - if (!is_root_cache(s)) + if (is_root_cache(s)) return; + root_cache = s->memcg_params->root_cache; + /* * This mean this cache had no attribute written. Therefore, no point * in copying default values around */ - if (!s->max_attr_size) + if (!root_cache->max_attr_size) return; for (i = 0; i < ARRAY_SIZE(slab_attrs); i++) { @@ -5101,7 +5084,7 @@ static void memcg_propagate_slab_attrs(struct kmem_cache *s) */ if (buffer) buf = buffer; - else if (s->max_attr_size < ARRAY_SIZE(mbuf)) + else if (root_cache->max_attr_size < ARRAY_SIZE(mbuf)) buf = mbuf; else { buffer = (char *) get_zeroed_page(GFP_KERNEL); @@ -5110,7 +5093,7 @@ static void memcg_propagate_slab_attrs(struct kmem_cache *s) buf = buffer; } - attr->show(s->memcg_params->root_cache, buf); + attr->show(root_cache, buf); attr->store(s, buf, strlen(buf)); } @@ -5119,6 +5102,11 @@ static void memcg_propagate_slab_attrs(struct kmem_cache *s) #endif } +static void kmem_cache_release(struct kobject *k) +{ + slab_kmem_cache_release(to_slab(k)); +} + static const struct sysfs_ops slab_sysfs_ops = { .show = slab_attr_show, .store = slab_attr_store, @@ -5126,6 +5114,7 @@ static const struct sysfs_ops slab_sysfs_ops = { static struct kobj_type slab_ktype = { .sysfs_ops = &slab_sysfs_ops, + .release = kmem_cache_release, }; static int uevent_filter(struct kset *kset, struct kobject *kobj) @@ -5252,7 +5241,7 @@ static int sysfs_slab_add(struct kmem_cache *s) goto out; } -static void sysfs_slab_remove(struct kmem_cache *s) +void sysfs_slab_remove(struct kmem_cache *s) { if (slab_state < FULL) /* diff --git a/mm/truncate.c b/mm/truncate.c index e5cc39ab0751..6a78c814bebf 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -484,14 +484,6 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping, unsigned long count = 0; int i; - /* - * Note: this function may get called on a shmem/tmpfs mapping: - * pagevec_lookup() might then return 0 prematurely (because it - * got a gangful of swap entries); but it's hardly worth worrying - * about - it can rarely have anything to free from such a mapping - * (most pages are dirty), and already skips over any difficulties. - */ - pagevec_init(&pvec, 0); while (index <= end && pagevec_lookup_entries(&pvec, mapping, index, min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1, diff --git a/mm/vmscan.c b/mm/vmscan.c index 3f56c8deb3c0..32c661d66a45 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1915,6 +1915,24 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc, file = get_lru_size(lruvec, LRU_ACTIVE_FILE) + get_lru_size(lruvec, LRU_INACTIVE_FILE); + /* + * Prevent the reclaimer from falling into the cache trap: as + * cache pages start out inactive, every cache fault will tip + * the scan balance towards the file LRU. And as the file LRU + * shrinks, so does the window for rotation from references. + * This means we have a runaway feedback loop where a tiny + * thrashing file LRU becomes infinitely more attractive than + * anon pages. Try to detect this based on file LRU size. + */ + if (global_reclaim(sc)) { + unsigned long free = zone_page_state(zone, NR_FREE_PAGES); + + if (unlikely(file + free <= high_wmark_pages(zone))) { + scan_balance = SCAN_ANON; + goto out; + } + } + /* * There is enough inactive page cache, do not reclaim * anything from the anonymous working set right now.