diff --git a/lib/libbe/be.c b/lib/libbe/be.c index 13f7a59d5215..cc6b8533a8f9 100644 --- a/lib/libbe/be.c +++ b/lib/libbe/be.c @@ -318,7 +318,7 @@ be_promote_dependent_clones(zfs_handle_t *zfs_hdl, struct be_destroy_data *bdd) struct promote_entry *entry; snprintf(bdd->target_name, BE_MAXPATHLEN, "%s/", zfs_get_name(zfs_hdl)); - err = zfs_iter_dependents(zfs_hdl, true, be_dependent_clone_cb, bdd); + err = zfs_iter_dependents(zfs_hdl, 0, true, be_dependent_clone_cb, bdd); /* * Drain the list and walk away from it if we're only deleting a @@ -360,13 +360,13 @@ be_destroy_cb(zfs_handle_t *zfs_hdl, void *data) bdd = (struct be_destroy_data *)data; if (bdd->snapname == NULL) { - err = zfs_iter_children(zfs_hdl, be_destroy_cb, data); + err = zfs_iter_children(zfs_hdl, 0, be_destroy_cb, data); if (err != 0) return (err); return (zfs_destroy(zfs_hdl, false)); } /* If we're dealing with snapshots instead, delete that one alone */ - err = zfs_iter_filesystems(zfs_hdl, be_destroy_cb, data); + err = zfs_iter_filesystems(zfs_hdl, 0, be_destroy_cb, data); if (err != 0) return (err); /* @@ -777,7 +777,7 @@ be_clone_cb(zfs_handle_t *ds, void *data) if (ldc->depth_limit == -1 || ldc->depth < ldc->depth_limit) { ldc->depth++; - err = zfs_iter_filesystems(ds, be_clone_cb, ldc); + err = zfs_iter_filesystems(ds, 0, be_clone_cb, ldc); ldc->depth--; } diff --git a/lib/libbe/be_access.c b/lib/libbe/be_access.c index ec966bc85fb7..e6f526de1aca 100644 --- a/lib/libbe/be_access.c +++ b/lib/libbe/be_access.c @@ -141,7 +141,7 @@ be_mount_iter(zfs_handle_t *zfs_hdl, void *data) skipmount: ++info->depth; - err = zfs_iter_filesystems(zfs_hdl, be_mount_iter, info); + err = zfs_iter_filesystems(zfs_hdl, 0, be_mount_iter, info); --info->depth; return (err); } @@ -158,7 +158,7 @@ be_umount_iter(zfs_handle_t *zfs_hdl, void *data) info = (struct be_mount_info *)data; ++info->depth; - if((err = zfs_iter_filesystems(zfs_hdl, be_umount_iter, info)) != 0) { + if((err = zfs_iter_filesystems(zfs_hdl, 0, be_umount_iter, info)) != 0) { return (err); } --info->depth; @@ -205,7 +205,7 @@ be_mounted_at(libbe_handle_t *lbh, const char *path, nvlist_t *details) info.path = path; info.name = NULL; - zfs_iter_filesystems(root_hdl, be_mountcheck_cb, &info); + zfs_iter_filesystems(root_hdl, 0, be_mountcheck_cb, &info); zfs_close(root_hdl); if (info.name != NULL) { diff --git a/lib/libbe/be_info.c b/lib/libbe/be_info.c index 81f3d46a7208..c29b4ba06922 100644 --- a/lib/libbe/be_info.c +++ b/lib/libbe/be_info.c @@ -258,7 +258,7 @@ be_proplist_update(prop_data_t *data) &data->bootonce); /* XXX TODO: some error checking here */ - zfs_iter_filesystems(root_hdl, prop_list_builder_cb, data); + zfs_iter_filesystems(root_hdl, 0, prop_list_builder_cb, data); zfs_close(root_hdl); @@ -269,7 +269,7 @@ static int snapshot_proplist_update(zfs_handle_t *hdl, prop_data_t *data) { - return (zfs_iter_snapshots_sorted(hdl, prop_list_builder_cb, data, + return (zfs_iter_snapshots_sorted(hdl, 0, prop_list_builder_cb, data, 0, 0)); } diff --git a/sys/contrib/openzfs/.github/workflows/build-dependencies.txt b/sys/contrib/openzfs/.github/workflows/build-dependencies.txt index 482d82fff17c..73921865c42a 100644 --- a/sys/contrib/openzfs/.github/workflows/build-dependencies.txt +++ b/sys/contrib/openzfs/.github/workflows/build-dependencies.txt @@ -6,6 +6,9 @@ bc build-essential curl dbench +debhelper-compat +dh-python +dkms fakeroot fio gdb @@ -33,12 +36,15 @@ mdadm nfs-kernel-server pamtester parted +po-debconf python3 +python3-all-dev python3-cffi python3-dev python3-packaging python3-pip python3-setuptools +python3-sphinx rng-tools-debian rsync samba diff --git a/sys/contrib/openzfs/.github/workflows/zfs-tests-functional.yml b/sys/contrib/openzfs/.github/workflows/zfs-tests-functional.yml index 69ca539b7bb6..08ce254ec8ad 100644 --- a/sys/contrib/openzfs/.github/workflows/zfs-tests-functional.yml +++ b/sys/contrib/openzfs/.github/workflows/zfs-tests-functional.yml @@ -15,9 +15,6 @@ jobs: - uses: actions/checkout@v3 with: ref: ${{ github.event.pull_request.head.sha }} - - name: Reclaim disk space - run: | - ${{ github.workspace }}/.github/workflows/scripts/reclaim_disk_space.sh - name: Install dependencies run: | sudo apt-get update @@ -32,15 +29,18 @@ jobs: ./configure --enable-debug --enable-debuginfo --enable-asan --enable-ubsan - name: Make run: | - make -j$(nproc) --no-print-directory --silent pkg-utils pkg-kmod + make --no-print-directory --silent native-deb-utils native-deb-kmod + mv ../*.deb . + rm ./openzfs-zfs-dkms*.deb ./openzfs-zfs-dracut*.deb - name: Install run: | - sudo dpkg -i *.deb # Update order of directories to search for modules, otherwise # Ubuntu will load kernel-shipped ones. sudo sed -i.bak 's/updates/extra updates/' /etc/depmod.d/ubuntu.conf - sudo depmod - sudo modprobe zfs + sudo dpkg -i *.deb + # Native Debian packages enable and start the services + # Stop zfs-zed daemon, as it may interfere with some ZTS test cases + sudo systemctl stop zfs-zed # Workaround for cloud-init bug # see https://github.com/openzfs/zfs/issues/12644 FILE=/lib/udev/rules.d/10-cloud-init-hook-hotplug.rules @@ -55,8 +55,9 @@ jobs: - name: Clear the kernel ring buffer run: | sudo dmesg -c >/var/tmp/dmesg-prerun - - name: Report disk space + - name: Reclaim and report disk space run: | + ${{ github.workspace }}/.github/workflows/scripts/reclaim_disk_space.sh df -h / - name: Tests run: | diff --git a/sys/contrib/openzfs/.github/workflows/zfs-tests-sanity.yml b/sys/contrib/openzfs/.github/workflows/zfs-tests-sanity.yml index f3fc607cb4fc..bab8aa175a85 100644 --- a/sys/contrib/openzfs/.github/workflows/zfs-tests-sanity.yml +++ b/sys/contrib/openzfs/.github/workflows/zfs-tests-sanity.yml @@ -11,9 +11,6 @@ jobs: - uses: actions/checkout@v3 with: ref: ${{ github.event.pull_request.head.sha }} - - name: Reclaim disk space - run: | - ${{ github.workspace }}/.github/workflows/scripts/reclaim_disk_space.sh - name: Install dependencies run: | sudo apt-get update @@ -28,15 +25,18 @@ jobs: ./configure --enable-debug --enable-debuginfo --enable-asan --enable-ubsan - name: Make run: | - make -j$(nproc) --no-print-directory --silent pkg-utils pkg-kmod + make --no-print-directory --silent native-deb-utils native-deb-kmod + mv ../*.deb . + rm ./openzfs-zfs-dkms*.deb ./openzfs-zfs-dracut*.deb - name: Install run: | - sudo dpkg -i *.deb # Update order of directories to search for modules, otherwise # Ubuntu will load kernel-shipped ones. sudo sed -i.bak 's/updates/extra updates/' /etc/depmod.d/ubuntu.conf - sudo depmod - sudo modprobe zfs + sudo dpkg -i *.deb + # Native Debian packages enable and start the services + # Stop zfs-zed daemon, as it may interfere with some ZTS test cases + sudo systemctl stop zfs-zed # Workaround for cloud-init bug # see https://github.com/openzfs/zfs/issues/12644 FILE=/lib/udev/rules.d/10-cloud-init-hook-hotplug.rules @@ -51,8 +51,9 @@ jobs: - name: Clear the kernel ring buffer run: | sudo dmesg -c >/var/tmp/dmesg-prerun - - name: Report disk space + - name: Reclaim and report disk space run: | + ${{ github.workspace }}/.github/workflows/scripts/reclaim_disk_space.sh df -h / - name: Tests run: | diff --git a/sys/contrib/openzfs/CODE_OF_CONDUCT.md b/sys/contrib/openzfs/CODE_OF_CONDUCT.md index 2dcc251e553d..51c9ef195430 100644 --- a/sys/contrib/openzfs/CODE_OF_CONDUCT.md +++ b/sys/contrib/openzfs/CODE_OF_CONDUCT.md @@ -1,2 +1,2 @@ -The [OpenZFS Code of Conduct](http://www.open-zfs.org/wiki/Code_of_Conduct) +The [OpenZFS Code of Conduct](https://openzfs.org/wiki/Code_of_Conduct) applies to spaces associated with the OpenZFS project, including GitHub. diff --git a/sys/contrib/openzfs/META b/sys/contrib/openzfs/META index 0b4d784bb77c..18b3f3498eeb 100644 --- a/sys/contrib/openzfs/META +++ b/sys/contrib/openzfs/META @@ -6,5 +6,5 @@ Release: 1 Release-Tags: relext License: CDDL Author: OpenZFS -Linux-Maximum: 6.0 +Linux-Maximum: 6.1 Linux-Minimum: 3.10 diff --git a/sys/contrib/openzfs/cmd/arc_summary b/sys/contrib/openzfs/cmd/arc_summary index 9d0c2d30ddd6..7149629468e3 100755 --- a/sys/contrib/openzfs/cmd/arc_summary +++ b/sys/contrib/openzfs/cmd/arc_summary @@ -558,8 +558,12 @@ def section_arc(kstats_dict): arc_target_size = arc_stats['c'] arc_max = arc_stats['c_max'] arc_min = arc_stats['c_min'] + anon_size = arc_stats['anon_size'] mfu_size = arc_stats['mfu_size'] mru_size = arc_stats['mru_size'] + mfug_size = arc_stats['mfu_ghost_size'] + mrug_size = arc_stats['mru_ghost_size'] + unc_size = arc_stats['uncached_size'] meta_limit = arc_stats['arc_meta_limit'] meta_size = arc_stats['arc_meta_used'] dnode_limit = arc_stats['arc_dnode_limit'] @@ -574,11 +578,17 @@ def section_arc(kstats_dict): f_perc(arc_min, arc_max), f_bytes(arc_min)) prt_i2('Max size (high water):', target_size_ratio, f_bytes(arc_max)) - caches_size = int(mfu_size)+int(mru_size) + caches_size = int(anon_size)+int(mfu_size)+int(mru_size)+int(unc_size) + prt_i2('Anonymouns data size:', + f_perc(anon_size, caches_size), f_bytes(anon_size)) prt_i2('Most Frequently Used (MFU) cache size:', f_perc(mfu_size, caches_size), f_bytes(mfu_size)) prt_i2('Most Recently Used (MRU) cache size:', f_perc(mru_size, caches_size), f_bytes(mru_size)) + prt_i1('Most Frequently Used (MFU) ghost size:', f_bytes(mfug_size)) + prt_i1('Most Recently Used (MRU) ghost size:', f_bytes(mrug_size)) + prt_i2('Uncached data size:', + f_perc(unc_size, caches_size), f_bytes(unc_size)) prt_i2('Metadata cache size (hard limit):', f_perc(meta_limit, arc_max), f_bytes(meta_limit)) prt_i2('Metadata cache size (current):', @@ -626,78 +636,119 @@ def section_archits(kstats_dict): """ arc_stats = isolate_section('arcstats', kstats_dict) - all_accesses = int(arc_stats['hits'])+int(arc_stats['misses']) - actual_hits = int(arc_stats['mfu_hits'])+int(arc_stats['mru_hits']) - - prt_1('ARC total accesses (hits + misses):', f_hits(all_accesses)) - ta_todo = (('Cache hit ratio:', arc_stats['hits']), - ('Cache miss ratio:', arc_stats['misses']), - ('Actual hit ratio (MFU + MRU hits):', actual_hits)) + all_accesses = int(arc_stats['hits'])+int(arc_stats['iohits'])+\ + int(arc_stats['misses']) + prt_1('ARC total accesses:', f_hits(all_accesses)) + ta_todo = (('Total hits:', arc_stats['hits']), + ('Total I/O hits:', arc_stats['iohits']), + ('Total misses:', arc_stats['misses'])) for title, value in ta_todo: prt_i2(title, f_perc(value, all_accesses), f_hits(value)) + print() dd_total = int(arc_stats['demand_data_hits']) +\ + int(arc_stats['demand_data_iohits']) +\ int(arc_stats['demand_data_misses']) - prt_i2('Data demand efficiency:', - f_perc(arc_stats['demand_data_hits'], dd_total), - f_hits(dd_total)) - - dp_total = int(arc_stats['prefetch_data_hits']) +\ - int(arc_stats['prefetch_data_misses']) - prt_i2('Data prefetch efficiency:', - f_perc(arc_stats['prefetch_data_hits'], dp_total), - f_hits(dp_total)) - - known_hits = int(arc_stats['mfu_hits']) +\ - int(arc_stats['mru_hits']) +\ - int(arc_stats['mfu_ghost_hits']) +\ - int(arc_stats['mru_ghost_hits']) - - anon_hits = int(arc_stats['hits'])-known_hits - + prt_2('ARC demand data accesses:', f_perc(dd_total, all_accesses), + f_hits(dd_total)) + dd_todo = (('Demand data hits:', arc_stats['demand_data_hits']), + ('Demand data I/O hits:', arc_stats['demand_data_iohits']), + ('Demand data misses:', arc_stats['demand_data_misses'])) + for title, value in dd_todo: + prt_i2(title, f_perc(value, dd_total), f_hits(value)) print() - print('Cache hits by cache type:') + + dm_total = int(arc_stats['demand_metadata_hits']) +\ + int(arc_stats['demand_metadata_iohits']) +\ + int(arc_stats['demand_metadata_misses']) + prt_2('ARC demand metadata accesses:', f_perc(dm_total, all_accesses), + f_hits(dm_total)) + dm_todo = (('Demand metadata hits:', arc_stats['demand_metadata_hits']), + ('Demand metadata I/O hits:', + arc_stats['demand_metadata_iohits']), + ('Demand metadata misses:', arc_stats['demand_metadata_misses'])) + for title, value in dm_todo: + prt_i2(title, f_perc(value, dm_total), f_hits(value)) + print() + + pd_total = int(arc_stats['prefetch_data_hits']) +\ + int(arc_stats['prefetch_data_iohits']) +\ + int(arc_stats['prefetch_data_misses']) + prt_2('ARC prefetch metadata accesses:', f_perc(pd_total, all_accesses), + f_hits(pd_total)) + pd_todo = (('Prefetch data hits:', arc_stats['prefetch_data_hits']), + ('Prefetch data I/O hits:', arc_stats['prefetch_data_iohits']), + ('Prefetch data misses:', arc_stats['prefetch_data_misses'])) + for title, value in pd_todo: + prt_i2(title, f_perc(value, pd_total), f_hits(value)) + print() + + pm_total = int(arc_stats['prefetch_metadata_hits']) +\ + int(arc_stats['prefetch_metadata_iohits']) +\ + int(arc_stats['prefetch_metadata_misses']) + prt_2('ARC prefetch metadata accesses:', f_perc(pm_total, all_accesses), + f_hits(pm_total)) + pm_todo = (('Prefetch metadata hits:', + arc_stats['prefetch_metadata_hits']), + ('Prefetch metadata I/O hits:', + arc_stats['prefetch_metadata_iohits']), + ('Prefetch metadata misses:', + arc_stats['prefetch_metadata_misses'])) + for title, value in pm_todo: + prt_i2(title, f_perc(value, pm_total), f_hits(value)) + print() + + all_prefetches = int(arc_stats['predictive_prefetch'])+\ + int(arc_stats['prescient_prefetch']) + prt_2('ARC predictive prefetches:', + f_perc(arc_stats['predictive_prefetch'], all_prefetches), + f_hits(arc_stats['predictive_prefetch'])) + prt_i2('Demand hits after predictive:', + f_perc(arc_stats['demand_hit_predictive_prefetch'], + arc_stats['predictive_prefetch']), + f_hits(arc_stats['demand_hit_predictive_prefetch'])) + prt_i2('Demand I/O hits after predictive:', + f_perc(arc_stats['demand_iohit_predictive_prefetch'], + arc_stats['predictive_prefetch']), + f_hits(arc_stats['demand_iohit_predictive_prefetch'])) + never = int(arc_stats['predictive_prefetch']) -\ + int(arc_stats['demand_hit_predictive_prefetch']) -\ + int(arc_stats['demand_iohit_predictive_prefetch']) + prt_i2('Never demanded after predictive:', + f_perc(never, arc_stats['predictive_prefetch']), + f_hits(never)) + print() + + prt_2('ARC prescient prefetches:', + f_perc(arc_stats['prescient_prefetch'], all_prefetches), + f_hits(arc_stats['prescient_prefetch'])) + prt_i2('Demand hits after prescient:', + f_perc(arc_stats['demand_hit_prescient_prefetch'], + arc_stats['prescient_prefetch']), + f_hits(arc_stats['demand_hit_prescient_prefetch'])) + prt_i2('Demand I/O hits after prescient:', + f_perc(arc_stats['demand_iohit_prescient_prefetch'], + arc_stats['prescient_prefetch']), + f_hits(arc_stats['demand_iohit_prescient_prefetch'])) + never = int(arc_stats['prescient_prefetch'])-\ + int(arc_stats['demand_hit_prescient_prefetch'])-\ + int(arc_stats['demand_iohit_prescient_prefetch']) + prt_i2('Never demanded after prescient:', + f_perc(never, arc_stats['prescient_prefetch']), + f_hits(never)) + print() + + print('ARC states hits of all accesses:') cl_todo = (('Most frequently used (MFU):', arc_stats['mfu_hits']), ('Most recently used (MRU):', arc_stats['mru_hits']), ('Most frequently used (MFU) ghost:', arc_stats['mfu_ghost_hits']), ('Most recently used (MRU) ghost:', - arc_stats['mru_ghost_hits'])) - + arc_stats['mru_ghost_hits']), + ('Uncached:', arc_stats['uncached_hits'])) for title, value in cl_todo: - prt_i2(title, f_perc(value, arc_stats['hits']), f_hits(value)) - - # For some reason, anon_hits can turn negative, which is weird. Until we - # have figured out why this happens, we just hide the problem, following - # the behavior of the original arc_summary. - if anon_hits >= 0: - prt_i2('Anonymously used:', - f_perc(anon_hits, arc_stats['hits']), f_hits(anon_hits)) - - print() - print('Cache hits by data type:') - dt_todo = (('Demand data:', arc_stats['demand_data_hits']), - ('Prefetch data:', arc_stats['prefetch_data_hits']), - ('Demand metadata:', arc_stats['demand_metadata_hits']), - ('Prefetch metadata:', - arc_stats['prefetch_metadata_hits'])) - - for title, value in dt_todo: - prt_i2(title, f_perc(value, arc_stats['hits']), f_hits(value)) - - print() - print('Cache misses by data type:') - dm_todo = (('Demand data:', arc_stats['demand_data_misses']), - ('Prefetch data:', - arc_stats['prefetch_data_misses']), - ('Demand metadata:', arc_stats['demand_metadata_misses']), - ('Prefetch metadata:', - arc_stats['prefetch_metadata_misses'])) - - for title, value in dm_todo: - prt_i2(title, f_perc(value, arc_stats['misses']), f_hits(value)) - + prt_i2(title, f_perc(value, all_accesses), f_hits(value)) print() @@ -708,11 +759,17 @@ def section_dmu(kstats_dict): zfetch_access_total = int(zfetch_stats['hits'])+int(zfetch_stats['misses']) - prt_1('DMU prefetch efficiency:', f_hits(zfetch_access_total)) - prt_i2('Hit ratio:', f_perc(zfetch_stats['hits'], zfetch_access_total), + prt_1('DMU predictive prefetcher calls:', f_hits(zfetch_access_total)) + prt_i2('Stream hits:', + f_perc(zfetch_stats['hits'], zfetch_access_total), f_hits(zfetch_stats['hits'])) - prt_i2('Miss ratio:', f_perc(zfetch_stats['misses'], zfetch_access_total), + prt_i2('Stream misses:', + f_perc(zfetch_stats['misses'], zfetch_access_total), f_hits(zfetch_stats['misses'])) + prt_i2('Streams limit reached:', + f_perc(zfetch_stats['max_streams'], zfetch_stats['misses']), + f_hits(zfetch_stats['max_streams'])) + prt_i1('Prefetches issued', f_hits(zfetch_stats['io_issued'])) print() diff --git a/sys/contrib/openzfs/cmd/arcstat.in b/sys/contrib/openzfs/cmd/arcstat.in index 628a6bcef772..8df1c62f7e86 100755 --- a/sys/contrib/openzfs/cmd/arcstat.in +++ b/sys/contrib/openzfs/cmd/arcstat.in @@ -62,31 +62,64 @@ from signal import signal, SIGINT, SIGWINCH, SIG_DFL cols = { # HDR: [Size, Scale, Description] "time": [8, -1, "Time"], - "hits": [4, 1000, "ARC reads per second"], + "hits": [4, 1000, "ARC hits per second"], + "iohs": [4, 1000, "ARC I/O hits per second"], "miss": [4, 1000, "ARC misses per second"], "read": [4, 1000, "Total ARC accesses per second"], "hit%": [4, 100, "ARC hit percentage"], + "ioh%": [4, 100, "ARC I/O hit percentage"], "miss%": [5, 100, "ARC miss percentage"], "dhit": [4, 1000, "Demand hits per second"], + "dioh": [4, 1000, "Demand I/O hits per second"], "dmis": [4, 1000, "Demand misses per second"], "dh%": [3, 100, "Demand hit percentage"], + "di%": [3, 100, "Demand I/O hit percentage"], "dm%": [3, 100, "Demand miss percentage"], + "ddhit": [5, 1000, "Demand data hits per second"], + "ddioh": [5, 1000, "Demand data I/O hits per second"], + "ddmis": [5, 1000, "Demand data misses per second"], + "ddh%": [4, 100, "Demand data hit percentage"], + "ddi%": [4, 100, "Demand data I/O hit percentage"], + "ddm%": [4, 100, "Demand data miss percentage"], + "dmhit": [5, 1000, "Demand metadata hits per second"], + "dmioh": [5, 1000, "Demand metadata I/O hits per second"], + "dmmis": [5, 1000, "Demand metadata misses per second"], + "dmh%": [4, 100, "Demand metadata hit percentage"], + "dmi%": [4, 100, "Demand metadata I/O hit percentage"], + "dmm%": [4, 100, "Demand metadata miss percentage"], "phit": [4, 1000, "Prefetch hits per second"], + "pioh": [4, 1000, "Prefetch I/O hits per second"], "pmis": [4, 1000, "Prefetch misses per second"], "ph%": [3, 100, "Prefetch hits percentage"], + "pi%": [3, 100, "Prefetch I/O hits percentage"], "pm%": [3, 100, "Prefetch miss percentage"], + "pdhit": [5, 1000, "Prefetch data hits per second"], + "pdioh": [5, 1000, "Prefetch data I/O hits per second"], + "pdmis": [5, 1000, "Prefetch data misses per second"], + "pdh%": [4, 100, "Prefetch data hits percentage"], + "pdi%": [4, 100, "Prefetch data I/O hits percentage"], + "pdm%": [4, 100, "Prefetch data miss percentage"], + "pmhit": [5, 1000, "Prefetch metadata hits per second"], + "pmioh": [5, 1000, "Prefetch metadata I/O hits per second"], + "pmmis": [5, 1000, "Prefetch metadata misses per second"], + "pmh%": [4, 100, "Prefetch metadata hits percentage"], + "pmi%": [4, 100, "Prefetch metadata I/O hits percentage"], + "pmm%": [4, 100, "Prefetch metadata miss percentage"], "mhit": [4, 1000, "Metadata hits per second"], + "mioh": [4, 1000, "Metadata I/O hits per second"], "mmis": [4, 1000, "Metadata misses per second"], "mread": [5, 1000, "Metadata accesses per second"], "mh%": [3, 100, "Metadata hit percentage"], + "mi%": [3, 100, "Metadata I/O hit percentage"], "mm%": [3, 100, "Metadata miss percentage"], "arcsz": [5, 1024, "ARC size"], - "size": [4, 1024, "ARC size"], - "c": [4, 1024, "ARC target size"], + "size": [5, 1024, "ARC size"], + "c": [5, 1024, "ARC target size"], "mfu": [4, 1000, "MFU list hits per second"], "mru": [4, 1000, "MRU list hits per second"], "mfug": [4, 1000, "MFU ghost list hits per second"], "mrug": [4, 1000, "MRU ghost list hits per second"], + "unc": [4, 1000, "Uncached list hits per second"], "eskip": [5, 1000, "evict_skip per second"], "el2skip": [7, 1000, "evict skip, due to l2 writes, per second"], "el2cach": [7, 1024, "Size of L2 cached evictions per second"], @@ -96,7 +129,11 @@ cols = { "el2inel": [7, 1024, "Size of L2 ineligible evictions per second"], "mtxmis": [6, 1000, "mutex_miss per second"], "dread": [5, 1000, "Demand accesses per second"], + "ddread": [6, 1000, "Demand data accesses per second"], + "dmread": [6, 1000, "Demand metadata accesses per second"], "pread": [5, 1000, "Prefetch accesses per second"], + "pdread": [6, 1000, "Prefetch data accesses per second"], + "pmread": [6, 1000, "Prefetch metadata accesses per second"], "l2hits": [6, 1000, "L2ARC hits per second"], "l2miss": [6, 1000, "L2ARC misses per second"], "l2read": [6, 1000, "Total L2ARC accesses per second"], @@ -116,23 +153,22 @@ cols = { "l2size": [6, 1024, "Size of the L2ARC"], "l2bytes": [7, 1024, "Bytes read per second from the L2ARC"], "grow": [4, 1000, "ARC grow disabled"], - "need": [4, 1024, "ARC reclaim need"], - "free": [4, 1024, "ARC free memory"], + "need": [5, 1024, "ARC reclaim need"], + "free": [5, 1024, "ARC free memory"], "avail": [5, 1024, "ARC available memory"], "waste": [5, 1024, "Wasted memory due to round up to pagesize"], } v = {} -hdr = ["time", "read", "miss", "miss%", "dmis", "dm%", "pmis", "pm%", "mmis", - "mm%", "size", "c", "avail"] -xhdr = ["time", "mfu", "mru", "mfug", "mrug", "eskip", "mtxmis", "dread", - "pread", "read"] +hdr = ["time", "read", "ddread", "ddh%", "dmread", "dmh%", "pread", "ph%", + "size", "c", "avail"] +xhdr = ["time", "mfu", "mru", "mfug", "mrug", "unc", "eskip", "mtxmis", + "dread", "pread", "read"] sint = 1 # Default interval is 1 second count = 1 # Default count is 1 hdr_intr = 20 # Print header every 20 lines of output opfile = None sep = " " # Default separator is 2 spaces -version = "0.4" l2exist = False cmd = ("Usage: arcstat [-havxp] [-f fields] [-o file] [-s string] [interval " "[count]]\n") @@ -442,34 +478,80 @@ def calculate(): v = dict() v["time"] = time.strftime("%H:%M:%S", time.localtime()) v["hits"] = d["hits"] // sint + v["iohs"] = d["iohits"] // sint v["miss"] = d["misses"] // sint - v["read"] = v["hits"] + v["miss"] + v["read"] = v["hits"] + v["iohs"] + v["miss"] v["hit%"] = 100 * v["hits"] // v["read"] if v["read"] > 0 else 0 - v["miss%"] = 100 - v["hit%"] if v["read"] > 0 else 0 + v["ioh%"] = 100 * v["iohs"] // v["read"] if v["read"] > 0 else 0 + v["miss%"] = 100 - v["hit%"] - v["ioh%"] if v["read"] > 0 else 0 v["dhit"] = (d["demand_data_hits"] + d["demand_metadata_hits"]) // sint + v["dioh"] = (d["demand_data_iohits"] + d["demand_metadata_iohits"]) // sint v["dmis"] = (d["demand_data_misses"] + d["demand_metadata_misses"]) // sint - v["dread"] = v["dhit"] + v["dmis"] + v["dread"] = v["dhit"] + v["dioh"] + v["dmis"] v["dh%"] = 100 * v["dhit"] // v["dread"] if v["dread"] > 0 else 0 - v["dm%"] = 100 - v["dh%"] if v["dread"] > 0 else 0 + v["di%"] = 100 * v["dioh"] // v["dread"] if v["dread"] > 0 else 0 + v["dm%"] = 100 - v["dh%"] - v["di%"] if v["dread"] > 0 else 0 + + v["ddhit"] = d["demand_data_hits"] // sint + v["ddioh"] = d["demand_data_iohits"] // sint + v["ddmis"] = d["demand_data_misses"] // sint + + v["ddread"] = v["ddhit"] + v["ddioh"] + v["ddmis"] + v["ddh%"] = 100 * v["ddhit"] // v["ddread"] if v["ddread"] > 0 else 0 + v["ddi%"] = 100 * v["ddioh"] // v["ddread"] if v["ddread"] > 0 else 0 + v["ddm%"] = 100 - v["ddh%"] - v["ddi%"] if v["ddread"] > 0 else 0 + + v["dmhit"] = d["demand_metadata_hits"] // sint + v["dmioh"] = d["demand_metadata_iohits"] // sint + v["dmmis"] = d["demand_metadata_misses"] // sint + + v["dmread"] = v["dmhit"] + v["dmioh"] + v["dmmis"] + v["dmh%"] = 100 * v["dmhit"] // v["dmread"] if v["dmread"] > 0 else 0 + v["dmi%"] = 100 * v["dmioh"] // v["dmread"] if v["dmread"] > 0 else 0 + v["dmm%"] = 100 - v["dmh%"] - v["dmi%"] if v["dmread"] > 0 else 0 v["phit"] = (d["prefetch_data_hits"] + d["prefetch_metadata_hits"]) // sint + v["pioh"] = (d["prefetch_data_iohits"] + + d["prefetch_metadata_iohits"]) // sint v["pmis"] = (d["prefetch_data_misses"] + d["prefetch_metadata_misses"]) // sint - v["pread"] = v["phit"] + v["pmis"] + v["pread"] = v["phit"] + v["pioh"] + v["pmis"] v["ph%"] = 100 * v["phit"] // v["pread"] if v["pread"] > 0 else 0 - v["pm%"] = 100 - v["ph%"] if v["pread"] > 0 else 0 + v["pi%"] = 100 * v["pioh"] // v["pread"] if v["pread"] > 0 else 0 + v["pm%"] = 100 - v["ph%"] - v["pi%"] if v["pread"] > 0 else 0 + + v["pdhit"] = d["prefetch_data_hits"] // sint + v["pdioh"] = d["prefetch_data_iohits"] // sint + v["pdmis"] = d["prefetch_data_misses"] // sint + + v["pdread"] = v["pdhit"] + v["pdioh"] + v["pdmis"] + v["pdh%"] = 100 * v["pdhit"] // v["pdread"] if v["pdread"] > 0 else 0 + v["pdi%"] = 100 * v["pdioh"] // v["pdread"] if v["pdread"] > 0 else 0 + v["pdm%"] = 100 - v["pdh%"] - v["pdi%"] if v["pdread"] > 0 else 0 + + v["pmhit"] = d["prefetch_metadata_hits"] // sint + v["pmioh"] = d["prefetch_metadata_iohits"] // sint + v["pmmis"] = d["prefetch_metadata_misses"] // sint + + v["pmread"] = v["pmhit"] + v["pmioh"] + v["pmmis"] + v["pmh%"] = 100 * v["pmhit"] // v["pmread"] if v["pmread"] > 0 else 0 + v["pmi%"] = 100 * v["pmioh"] // v["pmread"] if v["pmread"] > 0 else 0 + v["pmm%"] = 100 - v["pmh%"] - v["pmi%"] if v["pmread"] > 0 else 0 v["mhit"] = (d["prefetch_metadata_hits"] + d["demand_metadata_hits"]) // sint + v["mioh"] = (d["prefetch_metadata_iohits"] + + d["demand_metadata_iohits"]) // sint v["mmis"] = (d["prefetch_metadata_misses"] + d["demand_metadata_misses"]) // sint - v["mread"] = v["mhit"] + v["mmis"] + v["mread"] = v["mhit"] + v["mioh"] + v["mmis"] v["mh%"] = 100 * v["mhit"] // v["mread"] if v["mread"] > 0 else 0 - v["mm%"] = 100 - v["mh%"] if v["mread"] > 0 else 0 + v["mi%"] = 100 * v["mioh"] // v["mread"] if v["mread"] > 0 else 0 + v["mm%"] = 100 - v["mh%"] - v["mi%"] if v["mread"] > 0 else 0 v["arcsz"] = cur["size"] v["size"] = cur["size"] @@ -478,6 +560,7 @@ def calculate(): v["mru"] = d["mru_hits"] // sint v["mrug"] = d["mru_ghost_hits"] // sint v["mfug"] = d["mfu_ghost_hits"] // sint + v["unc"] = d["uncached_hits"] // sint v["eskip"] = d["evict_skip"] // sint v["el2skip"] = d["evict_l2_skip"] // sint v["el2cach"] = d["evict_l2_cached"] // sint diff --git a/sys/contrib/openzfs/cmd/zdb/zdb.c b/sys/contrib/openzfs/cmd/zdb/zdb.c index 8b5e5a4ed932..b04b220c768e 100644 --- a/sys/contrib/openzfs/cmd/zdb/zdb.c +++ b/sys/contrib/openzfs/cmd/zdb/zdb.c @@ -3496,9 +3496,9 @@ dump_object(objset_t *os, uint64_t object, int verbosity, zdb_nicenum(doi.doi_physical_blocks_512 << 9, asize, sizeof (asize)); zdb_nicenum(doi.doi_bonus_size, bonus_size, sizeof (bonus_size)); zdb_nicenum(doi.doi_dnodesize, dnsize, sizeof (dnsize)); - (void) sprintf(fill, "%6.2f", 100.0 * doi.doi_fill_count * - doi.doi_data_block_size / (object == 0 ? DNODES_PER_BLOCK : 1) / - doi.doi_max_offset); + (void) snprintf(fill, sizeof (fill), "%6.2f", 100.0 * + doi.doi_fill_count * doi.doi_data_block_size / (object == 0 ? + DNODES_PER_BLOCK : 1) / doi.doi_max_offset); aux[0] = '\0'; @@ -4341,26 +4341,26 @@ dump_l2arc_log_entries(uint64_t log_entries, } static void -dump_l2arc_log_blkptr(l2arc_log_blkptr_t lbps) +dump_l2arc_log_blkptr(const l2arc_log_blkptr_t *lbps) { - (void) printf("|\t\tdaddr: %llu\n", (u_longlong_t)lbps.lbp_daddr); + (void) printf("|\t\tdaddr: %llu\n", (u_longlong_t)lbps->lbp_daddr); (void) printf("|\t\tpayload_asize: %llu\n", - (u_longlong_t)lbps.lbp_payload_asize); + (u_longlong_t)lbps->lbp_payload_asize); (void) printf("|\t\tpayload_start: %llu\n", - (u_longlong_t)lbps.lbp_payload_start); + (u_longlong_t)lbps->lbp_payload_start); (void) printf("|\t\tlsize: %llu\n", - (u_longlong_t)L2BLK_GET_LSIZE((&lbps)->lbp_prop)); + (u_longlong_t)L2BLK_GET_LSIZE(lbps->lbp_prop)); (void) printf("|\t\tasize: %llu\n", - (u_longlong_t)L2BLK_GET_PSIZE((&lbps)->lbp_prop)); + (u_longlong_t)L2BLK_GET_PSIZE(lbps->lbp_prop)); (void) printf("|\t\tcompralgo: %llu\n", - (u_longlong_t)L2BLK_GET_COMPRESS((&lbps)->lbp_prop)); + (u_longlong_t)L2BLK_GET_COMPRESS(lbps->lbp_prop)); (void) printf("|\t\tcksumalgo: %llu\n", - (u_longlong_t)L2BLK_GET_CHECKSUM((&lbps)->lbp_prop)); + (u_longlong_t)L2BLK_GET_CHECKSUM(lbps->lbp_prop)); (void) printf("|\n\n"); } static void -dump_l2arc_log_blocks(int fd, l2arc_dev_hdr_phys_t l2dhdr, +dump_l2arc_log_blocks(int fd, const l2arc_dev_hdr_phys_t *l2dhdr, l2arc_dev_hdr_phys_t *rebuild) { l2arc_log_blk_phys_t this_lb; @@ -4373,13 +4373,13 @@ dump_l2arc_log_blocks(int fd, l2arc_dev_hdr_phys_t l2dhdr, if (!dump_opt['q']) print_l2arc_log_blocks(); - memcpy(lbps, l2dhdr.dh_start_lbps, sizeof (lbps)); + memcpy(lbps, l2dhdr->dh_start_lbps, sizeof (lbps)); - dev.l2ad_evict = l2dhdr.dh_evict; - dev.l2ad_start = l2dhdr.dh_start; - dev.l2ad_end = l2dhdr.dh_end; + dev.l2ad_evict = l2dhdr->dh_evict; + dev.l2ad_start = l2dhdr->dh_start; + dev.l2ad_end = l2dhdr->dh_end; - if (l2dhdr.dh_start_lbps[0].lbp_daddr == 0) { + if (l2dhdr->dh_start_lbps[0].lbp_daddr == 0) { /* no log blocks to read */ if (!dump_opt['q']) { (void) printf("No log blocks to read\n"); @@ -4391,7 +4391,7 @@ dump_l2arc_log_blocks(int fd, l2arc_dev_hdr_phys_t l2dhdr, L2BLK_GET_PSIZE((&lbps[0])->lbp_prop); } - dev.l2ad_first = !!(l2dhdr.dh_flags & L2ARC_DEV_HDR_EVICT_FIRST); + dev.l2ad_first = !!(l2dhdr->dh_flags & L2ARC_DEV_HDR_EVICT_FIRST); for (;;) { if (!l2arc_log_blkptr_valid(&dev, &lbps[0])) @@ -4412,7 +4412,7 @@ dump_l2arc_log_blocks(int fd, l2arc_dev_hdr_phys_t l2dhdr, failed++; if (!dump_opt['q']) { (void) printf("Invalid cksum\n"); - dump_l2arc_log_blkptr(lbps[0]); + dump_l2arc_log_blkptr(&lbps[0]); } break; } @@ -4449,11 +4449,11 @@ dump_l2arc_log_blocks(int fd, l2arc_dev_hdr_phys_t l2dhdr, (void) printf("lb[%4llu]\tmagic: %llu\n", (u_longlong_t)rebuild->dh_lb_count, (u_longlong_t)this_lb.lb_magic); - dump_l2arc_log_blkptr(lbps[0]); + dump_l2arc_log_blkptr(&lbps[0]); } if (dump_opt['l'] > 2 && !dump_opt['q']) - dump_l2arc_log_entries(l2dhdr.dh_log_entries, + dump_l2arc_log_entries(l2dhdr->dh_log_entries, this_lb.lb_entries, rebuild->dh_lb_count); @@ -4531,7 +4531,7 @@ dump_l2arc_header(int fd) (u_longlong_t)l2dhdr.dh_trim_state); } - dump_l2arc_log_blocks(fd, l2dhdr, &rebuild); + dump_l2arc_log_blocks(fd, &l2dhdr, &rebuild); /* * The total aligned size of log blocks and the number of log blocks * reported in the header of the device may be less than what zdb diff --git a/sys/contrib/openzfs/cmd/zed/agents/zfs_agents.c b/sys/contrib/openzfs/cmd/zed/agents/zfs_agents.c index fb07266dae21..587051c25bfd 100644 --- a/sys/contrib/openzfs/cmd/zed/agents/zfs_agents.c +++ b/sys/contrib/openzfs/cmd/zed/agents/zfs_agents.c @@ -170,7 +170,7 @@ zfs_agent_iter_pool(zpool_handle_t *zhp, void *arg) } zpool_close(zhp); - return (gsp->gs_vdev_guid != 0); + return (gsp->gs_devid != NULL && gsp->gs_vdev_guid != 0); } void diff --git a/sys/contrib/openzfs/cmd/zed/agents/zfs_diagnosis.c b/sys/contrib/openzfs/cmd/zed/agents/zfs_diagnosis.c index 0250682f9d46..685f71bb923b 100644 --- a/sys/contrib/openzfs/cmd/zed/agents/zfs_diagnosis.c +++ b/sys/contrib/openzfs/cmd/zed/agents/zfs_diagnosis.c @@ -39,6 +39,15 @@ #include "zfs_agents.h" #include "fmd_api.h" +/* + * Default values for the serd engine when processing checksum or io errors. The + * semantics are N in T . + */ +#define DEFAULT_CHECKSUM_N 10 /* events */ +#define DEFAULT_CHECKSUM_T 600 /* seconds */ +#define DEFAULT_IO_N 10 /* events */ +#define DEFAULT_IO_T 600 /* seconds */ + /* * Our serd engines are named 'zfs___{checksum,io}'. This * #define reserves enough space for two 64-bit hex values plus the length of @@ -448,6 +457,8 @@ zfs_fm_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class) zfs_case_t *zcp, *dcp; int32_t pool_state; uint64_t ena, pool_guid, vdev_guid; + uint64_t checksum_n, checksum_t; + uint64_t io_n, io_t; er_timeval_t pool_load; er_timeval_t er_when; nvlist_t *detector; @@ -784,11 +795,21 @@ zfs_fm_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class) if (fmd_nvl_class_match(hdl, nvl, ZFS_MAKE_EREPORT(FM_EREPORT_ZFS_IO))) { if (zcp->zc_data.zc_serd_io[0] == '\0') { + if (nvlist_lookup_uint64(nvl, + FM_EREPORT_PAYLOAD_ZFS_VDEV_IO_N, + &io_n) != 0) { + io_n = DEFAULT_IO_N; + } + if (nvlist_lookup_uint64(nvl, + FM_EREPORT_PAYLOAD_ZFS_VDEV_IO_T, + &io_t) != 0) { + io_t = DEFAULT_IO_T; + } zfs_serd_name(zcp->zc_data.zc_serd_io, pool_guid, vdev_guid, "io"); fmd_serd_create(hdl, zcp->zc_data.zc_serd_io, - fmd_prop_get_int32(hdl, "io_N"), - fmd_prop_get_int64(hdl, "io_T")); + io_n, + SEC2NSEC(io_t)); zfs_case_serialize(zcp); } if (fmd_serd_record(hdl, zcp->zc_data.zc_serd_io, ep)) @@ -813,12 +834,23 @@ zfs_fm_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class) } if (zcp->zc_data.zc_serd_checksum[0] == '\0') { + if (nvlist_lookup_uint64(nvl, + FM_EREPORT_PAYLOAD_ZFS_VDEV_CKSUM_N, + &checksum_n) != 0) { + checksum_n = DEFAULT_CHECKSUM_N; + } + if (nvlist_lookup_uint64(nvl, + FM_EREPORT_PAYLOAD_ZFS_VDEV_CKSUM_T, + &checksum_t) != 0) { + checksum_t = DEFAULT_CHECKSUM_T; + } + zfs_serd_name(zcp->zc_data.zc_serd_checksum, pool_guid, vdev_guid, "checksum"); fmd_serd_create(hdl, zcp->zc_data.zc_serd_checksum, - fmd_prop_get_int32(hdl, "checksum_N"), - fmd_prop_get_int64(hdl, "checksum_T")); + checksum_n, + SEC2NSEC(checksum_t)); zfs_case_serialize(zcp); } if (fmd_serd_record(hdl, diff --git a/sys/contrib/openzfs/cmd/zed/agents/zfs_mod.c b/sys/contrib/openzfs/cmd/zed/agents/zfs_mod.c index 4fc8ceb9fb44..e73fe25e56dd 100644 --- a/sys/contrib/openzfs/cmd/zed/agents/zfs_mod.c +++ b/sys/contrib/openzfs/cmd/zed/agents/zfs_mod.c @@ -190,10 +190,12 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled) uint64_t wholedisk = 0ULL; uint64_t offline = 0ULL, faulted = 0ULL; uint64_t guid = 0ULL; + uint64_t is_spare = 0; char *physpath = NULL, *new_devid = NULL, *enc_sysfs_path = NULL; char rawpath[PATH_MAX], fullpath[PATH_MAX]; char devpath[PATH_MAX]; int ret; + int online_flag = ZFS_ONLINE_CHECKREMOVE | ZFS_ONLINE_UNSPARE; boolean_t is_sd = B_FALSE; boolean_t is_mpath_wholedisk = B_FALSE; uint_t c; @@ -219,6 +221,7 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled) (void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_FAULTED, &faulted); (void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_GUID, &guid); + (void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_IS_SPARE, &is_spare); /* * Special case: @@ -309,11 +312,13 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled) } } + if (is_spare) + online_flag |= ZFS_ONLINE_SPARE; + /* * Attempt to online the device. */ - if (zpool_vdev_online(zhp, fullpath, - ZFS_ONLINE_CHECKREMOVE | ZFS_ONLINE_UNSPARE, &newstate) == 0 && + if (zpool_vdev_online(zhp, fullpath, online_flag, &newstate) == 0 && (newstate == VDEV_STATE_HEALTHY || newstate == VDEV_STATE_DEGRADED)) { zed_log_msg(LOG_INFO, @@ -537,6 +542,7 @@ typedef struct dev_data { uint64_t dd_vdev_guid; uint64_t dd_new_vdev_guid; const char *dd_new_devid; + uint64_t dd_num_spares; } dev_data_t; static void @@ -547,6 +553,7 @@ zfs_iter_vdev(zpool_handle_t *zhp, nvlist_t *nvl, void *data) uint_t c, children; nvlist_t **child; uint64_t guid = 0; + uint64_t isspare = 0; /* * First iterate over any children. @@ -572,7 +579,7 @@ zfs_iter_vdev(zpool_handle_t *zhp, nvlist_t *nvl, void *data) } /* once a vdev was matched and processed there is nothing left to do */ - if (dp->dd_found) + if (dp->dd_found && dp->dd_num_spares == 0) return; (void) nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_GUID, &guid); @@ -622,6 +629,10 @@ zfs_iter_vdev(zpool_handle_t *zhp, nvlist_t *nvl, void *data) } } + if (dp->dd_found == B_TRUE && nvlist_lookup_uint64(nvl, + ZPOOL_CONFIG_IS_SPARE, &isspare) == 0 && isspare) + dp->dd_num_spares++; + (dp->dd_func)(zhp, nvl, dp->dd_islabeled); } @@ -682,7 +693,9 @@ zfs_iter_pool(zpool_handle_t *zhp, void *data) } zpool_close(zhp); - return (dp->dd_found); /* cease iteration after a match */ + + /* cease iteration after a match */ + return (dp->dd_found && dp->dd_num_spares == 0); } /* diff --git a/sys/contrib/openzfs/cmd/zed/agents/zfs_retire.c b/sys/contrib/openzfs/cmd/zed/agents/zfs_retire.c index f4b6dff48176..45a45c497a0a 100644 --- a/sys/contrib/openzfs/cmd/zed/agents/zfs_retire.c +++ b/sys/contrib/openzfs/cmd/zed/agents/zfs_retire.c @@ -76,6 +76,8 @@ typedef struct find_cbdata { uint64_t cb_guid; zpool_handle_t *cb_zhp; nvlist_t *cb_vdev; + uint64_t cb_vdev_guid; + uint64_t cb_num_spares; } find_cbdata_t; static int @@ -141,6 +143,64 @@ find_vdev(libzfs_handle_t *zhdl, nvlist_t *nv, uint64_t search_guid) return (NULL); } +static int +remove_spares(zpool_handle_t *zhp, void *data) +{ + nvlist_t *config, *nvroot; + nvlist_t **spares; + uint_t nspares; + char *devname; + find_cbdata_t *cbp = data; + uint64_t spareguid = 0; + vdev_stat_t *vs; + unsigned int c; + + config = zpool_get_config(zhp, NULL); + if (nvlist_lookup_nvlist(config, + ZPOOL_CONFIG_VDEV_TREE, &nvroot) != 0) { + zpool_close(zhp); + return (0); + } + + if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, + &spares, &nspares) != 0) { + zpool_close(zhp); + return (0); + } + + for (int i = 0; i < nspares; i++) { + if (nvlist_lookup_uint64(spares[i], ZPOOL_CONFIG_GUID, + &spareguid) == 0 && spareguid == cbp->cb_vdev_guid) { + devname = zpool_vdev_name(NULL, zhp, spares[i], + B_FALSE); + nvlist_lookup_uint64_array(spares[i], + ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &c); + if (vs->vs_state != VDEV_STATE_REMOVED && + zpool_vdev_remove_wanted(zhp, devname) == 0) + cbp->cb_num_spares++; + break; + } + } + + zpool_close(zhp); + return (0); +} + +/* + * Given a vdev guid, find and remove all spares associated with it. + */ +static int +find_and_remove_spares(libzfs_handle_t *zhdl, uint64_t vdev_guid) +{ + find_cbdata_t cb; + + cb.cb_num_spares = 0; + cb.cb_vdev_guid = vdev_guid; + zpool_iter(zhdl, remove_spares, &cb); + + return (cb.cb_num_spares); +} + /* * Given a (pool, vdev) GUID pair, find the matching pool and vdev. */ @@ -315,6 +375,8 @@ zfs_retire_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, libzfs_handle_t *zhdl = zdp->zrd_hdl; boolean_t fault_device, degrade_device; boolean_t is_repair; + boolean_t l2arc = B_FALSE; + boolean_t spare = B_FALSE; char *scheme; nvlist_t *vdev = NULL; char *uuid; @@ -323,7 +385,6 @@ zfs_retire_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, boolean_t is_disk; vdev_aux_t aux; uint64_t state = 0; - int l2arc; vdev_stat_t *vs; unsigned int c; @@ -343,10 +404,26 @@ zfs_retire_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, char *devtype; char *devname; + if (nvlist_lookup_string(nvl, FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE, + &devtype) == 0) { + if (strcmp(devtype, VDEV_TYPE_SPARE) == 0) + spare = B_TRUE; + else if (strcmp(devtype, VDEV_TYPE_L2CACHE) == 0) + l2arc = B_TRUE; + } + + if (nvlist_lookup_uint64(nvl, + FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID, &vdev_guid) != 0) + return; + + if (spare) { + int nspares = find_and_remove_spares(zhdl, vdev_guid); + fmd_hdl_debug(hdl, "%d spares removed", nspares); + return; + } + if (nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_ZFS_POOL_GUID, - &pool_guid) != 0 || - nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID, - &vdev_guid) != 0) + &pool_guid) != 0) return; if ((zhp = find_by_guid(zhdl, pool_guid, vdev_guid, @@ -367,10 +444,6 @@ zfs_retire_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, state == VDEV_STATE_REMOVED) return; - l2arc = (nvlist_lookup_string(nvl, - FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE, &devtype) == 0 && - strcmp(devtype, VDEV_TYPE_L2CACHE) == 0); - /* Remove the vdev since device is unplugged */ if (l2arc || (strcmp(class, "resource.fs.zfs.removed") == 0)) { int status = zpool_vdev_remove_wanted(zhp, devname); diff --git a/sys/contrib/openzfs/cmd/zed/zed.d/statechange-notify.sh b/sys/contrib/openzfs/cmd/zed/zed.d/statechange-notify.sh index c475fdb36660..ae610df20e45 100755 --- a/sys/contrib/openzfs/cmd/zed/zed.d/statechange-notify.sh +++ b/sys/contrib/openzfs/cmd/zed/zed.d/statechange-notify.sh @@ -38,7 +38,7 @@ if [ "${ZEVENT_VDEV_STATE_STR}" != "FAULTED" ] \ fi umask 077 -note_subject="ZFS device fault for pool ${ZEVENT_POOL_GUID} on $(hostname)" +note_subject="ZFS device fault for pool ${ZEVENT_POOL} on $(hostname)" note_pathname="$(mktemp)" { if [ "${ZEVENT_VDEV_STATE_STR}" = "FAULTED" ] ; then @@ -66,7 +66,7 @@ note_pathname="$(mktemp)" [ -n "${ZEVENT_VDEV_GUID}" ] && echo " vguid: ${ZEVENT_VDEV_GUID}" [ -n "${ZEVENT_VDEV_DEVID}" ] && echo " devid: ${ZEVENT_VDEV_DEVID}" - echo " pool: ${ZEVENT_POOL_GUID}" + echo " pool: ${ZEVENT_POOL} (${ZEVENT_POOL_GUID})" } > "${note_pathname}" diff --git a/sys/contrib/openzfs/cmd/zfs/zfs_iter.c b/sys/contrib/openzfs/cmd/zfs/zfs_iter.c index a0a80d481648..0f8ddd93aad7 100644 --- a/sys/contrib/openzfs/cmd/zfs/zfs_iter.c +++ b/sys/contrib/openzfs/cmd/zfs/zfs_iter.c @@ -143,19 +143,20 @@ zfs_callback(zfs_handle_t *zhp, void *data) (cb->cb_types & (ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME))) && zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM) { - (void) zfs_iter_filesystems(zhp, zfs_callback, data); + (void) zfs_iter_filesystems(zhp, cb->cb_flags, + zfs_callback, data); } if (((zfs_get_type(zhp) & (ZFS_TYPE_SNAPSHOT | ZFS_TYPE_BOOKMARK)) == 0) && include_snaps) { - (void) zfs_iter_snapshots(zhp, - (cb->cb_flags & ZFS_ITER_SIMPLE) != 0, + (void) zfs_iter_snapshots(zhp, cb->cb_flags, zfs_callback, data, 0, 0); } if (((zfs_get_type(zhp) & (ZFS_TYPE_SNAPSHOT | ZFS_TYPE_BOOKMARK)) == 0) && include_bmarks) { - (void) zfs_iter_bookmarks(zhp, zfs_callback, data); + (void) zfs_iter_bookmarks(zhp, cb->cb_flags, + zfs_callback, data); } cb->cb_depth--; @@ -211,18 +212,58 @@ zfs_free_sort_columns(zfs_sort_column_t *sc) } } -int -zfs_sort_only_by_name(const zfs_sort_column_t *sc) +/* + * Return true if all of the properties to be sorted are populated by + * dsl_dataset_fast_stat(). Note that sc == NULL (no sort) means we + * don't need any extra properties, so returns true. + */ +boolean_t +zfs_sort_only_by_fast(const zfs_sort_column_t *sc) { - return (sc != NULL && sc->sc_next == NULL && - sc->sc_prop == ZFS_PROP_NAME); + while (sc != NULL) { + switch (sc->sc_prop) { + case ZFS_PROP_NAME: + case ZFS_PROP_GUID: + case ZFS_PROP_CREATETXG: + case ZFS_PROP_NUMCLONES: + case ZFS_PROP_INCONSISTENT: + case ZFS_PROP_REDACTED: + case ZFS_PROP_ORIGIN: + break; + default: + return (B_FALSE); + } + sc = sc->sc_next; + } + + return (B_TRUE); } -int -zfs_sort_only_by_createtxg(const zfs_sort_column_t *sc) +boolean_t +zfs_list_only_by_fast(const zprop_list_t *p) { - return (sc != NULL && sc->sc_next == NULL && - sc->sc_prop == ZFS_PROP_CREATETXG); + if (p == NULL) { + /* NULL means 'all' so we can't use simple mode */ + return (B_FALSE); + } + + while (p != NULL) { + switch (p->pl_prop) { + case ZFS_PROP_NAME: + case ZFS_PROP_GUID: + case ZFS_PROP_CREATETXG: + case ZFS_PROP_NUMCLONES: + case ZFS_PROP_INCONSISTENT: + case ZFS_PROP_REDACTED: + case ZFS_PROP_ORIGIN: + break; + default: + return (B_FALSE); + } + p = p->pl_next; + } + + return (B_TRUE); } static int diff --git a/sys/contrib/openzfs/cmd/zfs/zfs_iter.h b/sys/contrib/openzfs/cmd/zfs/zfs_iter.h index effb22ded3fc..d742ec7b2ec4 100644 --- a/sys/contrib/openzfs/cmd/zfs/zfs_iter.h +++ b/sys/contrib/openzfs/cmd/zfs/zfs_iter.h @@ -40,20 +40,12 @@ typedef struct zfs_sort_column { boolean_t sc_reverse; } zfs_sort_column_t; -#define ZFS_ITER_RECURSE (1 << 0) -#define ZFS_ITER_ARGS_CAN_BE_PATHS (1 << 1) -#define ZFS_ITER_PROP_LISTSNAPS (1 << 2) -#define ZFS_ITER_DEPTH_LIMIT (1 << 3) -#define ZFS_ITER_RECVD_PROPS (1 << 4) -#define ZFS_ITER_LITERAL_PROPS (1 << 5) -#define ZFS_ITER_SIMPLE (1 << 6) - int zfs_for_each(int, char **, int options, zfs_type_t, zfs_sort_column_t *, zprop_list_t **, int, zfs_iter_f, void *); int zfs_add_sort_column(zfs_sort_column_t **, const char *, boolean_t); void zfs_free_sort_columns(zfs_sort_column_t *); -int zfs_sort_only_by_name(const zfs_sort_column_t *); -int zfs_sort_only_by_createtxg(const zfs_sort_column_t *); +boolean_t zfs_sort_only_by_fast(const zfs_sort_column_t *); +boolean_t zfs_list_only_by_fast(const zprop_list_t *); #ifdef __cplusplus } diff --git a/sys/contrib/openzfs/cmd/zfs/zfs_main.c b/sys/contrib/openzfs/cmd/zfs/zfs_main.c index 2acd4aaf7338..5880343a92f3 100644 --- a/sys/contrib/openzfs/cmd/zfs/zfs_main.c +++ b/sys/contrib/openzfs/cmd/zfs/zfs_main.c @@ -327,15 +327,15 @@ get_usage(zfs_help_t idx) case HELP_ROLLBACK: return (gettext("\trollback [-rRf] \n")); case HELP_SEND: - return (gettext("\tsend [-DLPbcehnpsvw] " + return (gettext("\tsend [-DLPbcehnpsVvw] " "[-i|-I snapshot]\n" "\t [-R [-X dataset[,dataset]...]] \n" - "\tsend [-DnvPLecw] [-i snapshot|bookmark] " + "\tsend [-DnVvPLecw] [-i snapshot|bookmark] " "\n" - "\tsend [-DnPpvLec] [-i bookmark|snapshot] " + "\tsend [-DnPpVvLec] [-i bookmark|snapshot] " "--redact \n" - "\tsend [-nvPe] -t \n" - "\tsend [-Pnv] --saved filesystem\n")); + "\tsend [-nVvPe] -t \n" + "\tsend [-PnVv] --saved filesystem\n")); case HELP_SET: return (gettext("\tset ... " " ...\n")); @@ -1531,7 +1531,7 @@ destroy_print_snapshots(zfs_handle_t *fs_zhp, destroy_cbdata_t *cb) int err; assert(cb->cb_firstsnap == NULL); assert(cb->cb_prevsnap == NULL); - err = zfs_iter_snapshots_sorted(fs_zhp, destroy_print_cb, cb, 0, 0); + err = zfs_iter_snapshots_sorted(fs_zhp, 0, destroy_print_cb, cb, 0, 0); if (cb->cb_firstsnap != NULL) { uint64_t used = 0; if (err == 0) { @@ -1557,7 +1557,7 @@ snapshot_to_nvl_cb(zfs_handle_t *zhp, void *arg) if (!cb->cb_doclones && !cb->cb_defer_destroy) { cb->cb_target = zhp; cb->cb_first = B_TRUE; - err = zfs_iter_dependents(zhp, B_TRUE, + err = zfs_iter_dependents(zhp, 0, B_TRUE, destroy_check_dependent, cb); } @@ -1575,7 +1575,8 @@ gather_snapshots(zfs_handle_t *zhp, void *arg) destroy_cbdata_t *cb = arg; int err = 0; - err = zfs_iter_snapspec(zhp, cb->cb_snapspec, snapshot_to_nvl_cb, cb); + err = zfs_iter_snapspec(zhp, 0, cb->cb_snapspec, + snapshot_to_nvl_cb, cb); if (err == ENOENT) err = 0; if (err != 0) @@ -1588,7 +1589,7 @@ gather_snapshots(zfs_handle_t *zhp, void *arg) } if (cb->cb_recurse) - err = zfs_iter_filesystems(zhp, gather_snapshots, cb); + err = zfs_iter_filesystems(zhp, 0, gather_snapshots, cb); out: zfs_close(zhp); @@ -1613,7 +1614,7 @@ destroy_clones(destroy_cbdata_t *cb) * false while destroying the clones. */ cb->cb_defer_destroy = B_FALSE; - err = zfs_iter_dependents(zhp, B_FALSE, + err = zfs_iter_dependents(zhp, 0, B_FALSE, destroy_callback, cb); cb->cb_defer_destroy = defer; zfs_close(zhp); @@ -1824,7 +1825,7 @@ zfs_do_destroy(int argc, char **argv) */ cb.cb_first = B_TRUE; if (!cb.cb_doclones && - zfs_iter_dependents(zhp, B_TRUE, destroy_check_dependent, + zfs_iter_dependents(zhp, 0, B_TRUE, destroy_check_dependent, &cb) != 0) { rv = 1; goto out; @@ -1835,7 +1836,7 @@ zfs_do_destroy(int argc, char **argv) goto out; } cb.cb_batchedsnaps = fnvlist_alloc(); - if (zfs_iter_dependents(zhp, B_FALSE, destroy_callback, + if (zfs_iter_dependents(zhp, 0, B_FALSE, destroy_callback, &cb) != 0) { rv = 1; goto out; @@ -3659,16 +3660,6 @@ found3:; argc -= optind; argv += optind; - /* - * If we are only going to list snapshot names and sort by name or - * by createtxg, then we can use faster version. - */ - if (strcmp(fields, "name") == 0 && - (zfs_sort_only_by_name(sortcol) || - zfs_sort_only_by_createtxg(sortcol))) { - flags |= ZFS_ITER_SIMPLE; - } - /* * If "-o space" and no types were specified, don't display snapshots. */ @@ -3696,6 +3687,15 @@ found3:; cb.cb_first = B_TRUE; + /* + * If we are only going to list and sort by properties that are "fast" + * then we can use "simple" mode and avoid populating the properties + * nvlist. + */ + if (zfs_list_only_by_fast(cb.cb_proplist) && + zfs_sort_only_by_fast(sortcol)) + flags |= ZFS_ITER_SIMPLE; + ret = zfs_for_each(argc, argv, flags, types, sortcol, &cb.cb_proplist, limit, list_callback, &cb); @@ -4006,7 +4006,7 @@ rollback_check(zfs_handle_t *zhp, void *data) } if (cbp->cb_recurse) { - if (zfs_iter_dependents(zhp, B_TRUE, + if (zfs_iter_dependents(zhp, 0, B_TRUE, rollback_check_dependent, cbp) != 0) { zfs_close(zhp); return (-1); @@ -4105,10 +4105,10 @@ zfs_do_rollback(int argc, char **argv) if (cb.cb_create > 0) min_txg = cb.cb_create; - if ((ret = zfs_iter_snapshots(zhp, B_FALSE, rollback_check, &cb, + if ((ret = zfs_iter_snapshots(zhp, 0, rollback_check, &cb, min_txg, 0)) != 0) goto out; - if ((ret = zfs_iter_bookmarks(zhp, rollback_check, &cb)) != 0) + if ((ret = zfs_iter_bookmarks(zhp, 0, rollback_check, &cb)) != 0) goto out; if ((ret = cb.cb_error) != 0) @@ -4250,7 +4250,7 @@ zfs_snapshot_cb(zfs_handle_t *zhp, void *arg) free(name); if (sd->sd_recursive) - rv = zfs_iter_filesystems(zhp, zfs_snapshot_cb, sd); + rv = zfs_iter_filesystems(zhp, 0, zfs_snapshot_cb, sd); zfs_close(zhp); return (rv); } @@ -4388,6 +4388,7 @@ zfs_do_send(int argc, char **argv) {"props", no_argument, NULL, 'p'}, {"parsable", no_argument, NULL, 'P'}, {"dedup", no_argument, NULL, 'D'}, + {"proctitle", no_argument, NULL, 'V'}, {"verbose", no_argument, NULL, 'v'}, {"dryrun", no_argument, NULL, 'n'}, {"large-block", no_argument, NULL, 'L'}, @@ -4403,7 +4404,7 @@ zfs_do_send(int argc, char **argv) }; /* check options */ - while ((c = getopt_long(argc, argv, ":i:I:RsDpvnPLeht:cwbd:SX:", + while ((c = getopt_long(argc, argv, ":i:I:RsDpVvnPLeht:cwbd:SX:", long_options, NULL)) != -1) { switch (c) { case 'X': @@ -4452,6 +4453,9 @@ zfs_do_send(int argc, char **argv) case 'P': flags.parsable = B_TRUE; break; + case 'V': + flags.progressastitle = B_TRUE; + break; case 'v': flags.verbosity++; flags.progress = B_TRUE; @@ -6310,7 +6314,7 @@ zfs_do_allow_unallow_impl(int argc, char **argv, boolean_t un) if (un && opts.recursive) { struct deleg_perms data = { un, update_perm_nvl }; - if (zfs_iter_filesystems(zhp, set_deleg_perms, + if (zfs_iter_filesystems(zhp, 0, set_deleg_perms, &data) != 0) goto cleanup0; } @@ -6688,7 +6692,7 @@ get_one_dataset(zfs_handle_t *zhp, void *data) /* * Iterate over any nested datasets. */ - if (zfs_iter_filesystems(zhp, get_one_dataset, data) != 0) { + if (zfs_iter_filesystems(zhp, 0, get_one_dataset, data) != 0) { zfs_close(zhp); return (1); } @@ -8668,6 +8672,7 @@ main(int argc, char **argv) int i = 0; const char *cmdname; char **newargv; + extern char **environ; (void) setlocale(LC_ALL, ""); (void) setlocale(LC_NUMERIC, "C"); @@ -8725,6 +8730,8 @@ main(int argc, char **argv) libzfs_print_on_error(g_zfs, B_TRUE); + zfs_setproctitle_init(argc, argv, environ); + /* * Many commands modify input strings for string parsing reasons. * We create a copy to protect the original argv. diff --git a/sys/contrib/openzfs/cmd/zhack.c b/sys/contrib/openzfs/cmd/zhack.c index a1063ab147e2..0b6da31ec573 100644 --- a/sys/contrib/openzfs/cmd/zhack.c +++ b/sys/contrib/openzfs/cmd/zhack.c @@ -30,6 +30,7 @@ * result in corrupted pools. */ +#include #include #include #include @@ -646,8 +647,6 @@ zhack_do_label(int argc, char **argv) int main(int argc, char **argv) { - extern void zfs_prop_init(void); - char *path[MAX_NUM_PATHS]; const char *subcommand; int rv = 0; diff --git a/sys/contrib/openzfs/cmd/zpool/zpool_main.c b/sys/contrib/openzfs/cmd/zpool/zpool_main.c index 0b55bf21f448..93d6a18981cb 100644 --- a/sys/contrib/openzfs/cmd/zpool/zpool_main.c +++ b/sys/contrib/openzfs/cmd/zpool/zpool_main.c @@ -421,7 +421,8 @@ get_usage(zpool_help_t idx) return (gettext("\tget [-Hp] [-o \"all\" | field[,...]] " "<\"all\" | property[,...]> ...\n")); case HELP_SET: - return (gettext("\tset \n")); + return (gettext("\tset \n" + "\tset \n")); case HELP_SPLIT: return (gettext("\tsplit [-gLnPl] [-R altroot] [-o mntopts]\n" "\t [-o property=value] " @@ -5184,21 +5185,14 @@ get_stat_flags(zpool_list_t *list) static int is_vdev_cb(void *zhp_data, nvlist_t *nv, void *cb_data) { + uint64_t guid; vdev_cbdata_t *cb = cb_data; - char *name = NULL; - int ret = 1; /* assume match */ zpool_handle_t *zhp = zhp_data; - name = zpool_vdev_name(g_zfs, zhp, nv, cb->cb_name_flags); + if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) != 0) + return (0); - if (strcmp(name, cb->cb_names[0])) { - free(name); - name = zpool_vdev_name(g_zfs, zhp, nv, VDEV_NAME_GUID); - ret = (strcmp(name, cb->cb_names[0]) == 0); - } - free(name); - - return (ret); + return (guid == zpool_vdev_path_to_guid(zhp, cb->cb_names[0])); } /* @@ -5429,7 +5423,13 @@ print_zpool_dir_scripts(char *dirpath) if ((dir = opendir(dirpath)) != NULL) { /* print all the files and directories within directory */ while ((ent = readdir(dir)) != NULL) { - sprintf(fullpath, "%s/%s", dirpath, ent->d_name); + if (snprintf(fullpath, sizeof (fullpath), "%s/%s", + dirpath, ent->d_name) >= sizeof (fullpath)) { + (void) fprintf(stderr, + gettext("internal error: " + "ZPOOL_SCRIPTS_PATH too large.\n")); + exit(1); + } /* Print the scripts */ if (stat(fullpath, &dir_stat) == 0) @@ -8593,37 +8593,17 @@ status_callback(zpool_handle_t *zhp, void *data) if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_ERRCOUNT, &nerr) == 0) { - nvlist_t *nverrlist = NULL; - - /* - * If the approximate error count is small, get a - * precise count by fetching the entire log and - * uniquifying the results. - */ - if (nerr > 0 && nerr < 100 && !cbp->cb_verbose && - zpool_get_errlog(zhp, &nverrlist) == 0) { - nvpair_t *elem; - - elem = NULL; - nerr = 0; - while ((elem = nvlist_next_nvpair(nverrlist, - elem)) != NULL) { - nerr++; - } - } - nvlist_free(nverrlist); - (void) printf("\n"); - - if (nerr == 0) - (void) printf(gettext("errors: No known data " - "errors\n")); - else if (!cbp->cb_verbose) + if (nerr == 0) { + (void) printf(gettext( + "errors: No known data errors\n")); + } else if (!cbp->cb_verbose) { (void) printf(gettext("errors: %llu data " "errors, use '-v' for a list\n"), (u_longlong_t)nerr); - else + } else { print_error_log(zhp); + } } if (cbp->cb_dedup_stats) @@ -8802,7 +8782,7 @@ check_unsupp_fs(zfs_handle_t *zhp, void *unsupp_fs) (*count)++; } - zfs_iter_filesystems(zhp, check_unsupp_fs, unsupp_fs); + zfs_iter_filesystems(zhp, 0, check_unsupp_fs, unsupp_fs); zfs_close(zhp); @@ -10348,29 +10328,27 @@ zpool_do_set(int argc, char **argv) argc -= 2; argv += 2; - if (are_vdevs_in_pool(argc, argv, NULL, &cb.cb_vdevs)) { - /* Argument is a vdev */ - cb.cb_vdevs.cb_names = argv; - cb.cb_vdevs.cb_names_count = 1; - cb.cb_type = ZFS_TYPE_VDEV; - argc = 0; /* No pools to process */ - } else if (are_all_pools(1, argv)) { - /* The first arg is a pool name */ - if (are_vdevs_in_pool(argc - 1, argv + 1, argv[0], - &cb.cb_vdevs)) { - /* 2nd argument is a vdev */ - cb.cb_vdevs.cb_names = argv + 1; - cb.cb_vdevs.cb_names_count = 1; - cb.cb_type = ZFS_TYPE_VDEV; - argc = 1; /* One pool to process */ - } else if (argc > 1) { - (void) fprintf(stderr, - gettext("too many pool names\n")); - usage(B_FALSE); - } + /* argv[0] is pool name */ + if (!is_pool(argv[0])) { + (void) fprintf(stderr, + gettext("cannot open '%s': is not a pool\n"), argv[0]); + return (EINVAL); } - error = for_each_pool(argc, argv, B_TRUE, NULL, ZFS_TYPE_POOL, + /* argv[1], when supplied, is vdev name */ + if (argc == 2) { + if (!are_vdevs_in_pool(1, argv + 1, argv[0], &cb.cb_vdevs)) { + (void) fprintf(stderr, gettext( + "cannot find '%s' in '%s': device not in pool\n"), + argv[1], argv[0]); + return (EINVAL); + } + cb.cb_vdevs.cb_names = argv + 1; + cb.cb_vdevs.cb_names_count = 1; + cb.cb_type = ZFS_TYPE_VDEV; + } + + error = for_each_pool(1, argv, B_TRUE, NULL, ZFS_TYPE_POOL, B_FALSE, set_callback, &cb); return (error); diff --git a/sys/contrib/openzfs/cmd/zstream/zstream_decompress.c b/sys/contrib/openzfs/cmd/zstream/zstream_decompress.c index 6e0da0852b72..0cef36c0441f 100644 --- a/sys/contrib/openzfs/cmd/zstream/zstream_decompress.c +++ b/sys/contrib/openzfs/cmd/zstream/zstream_decompress.c @@ -158,6 +158,8 @@ zstream_do_decompress(int argc, char *argv[]) } fletcher_4_init(); + int begin = 0; + boolean_t seen = B_FALSE; while (sfread(drr, sizeof (*drr), stdin) != 0) { struct drr_write *drrw; uint64_t payload_size = 0; @@ -174,8 +176,13 @@ zstream_do_decompress(int argc, char *argv[]) case DRR_BEGIN: { ZIO_SET_CHECKSUM(&stream_cksum, 0, 0, 0, 0); + VERIFY0(begin++); + seen = B_TRUE; + + uint32_t sz = drr->drr_payloadlen; + + VERIFY3U(sz, <=, 1U << 28); - int sz = drr->drr_payloadlen; if (sz != 0) { if (sz > bufsz) { buf = realloc(buf, sz); @@ -191,6 +198,13 @@ zstream_do_decompress(int argc, char *argv[]) case DRR_END: { struct drr_end *drre = &drr->drr_u.drr_end; + /* + * We would prefer to just check --begin == 0, but + * replication streams have an end of stream END + * record, so we must avoid tripping it. + */ + VERIFY3B(seen, ==, B_TRUE); + begin--; /* * Use the recalculated checksum, unless this is * the END record of a stream package, which has @@ -204,6 +218,7 @@ zstream_do_decompress(int argc, char *argv[]) case DRR_OBJECT: { struct drr_object *drro = &drr->drr_u.drr_object; + VERIFY3S(begin, ==, 1); if (drro->drr_bonuslen > 0) { payload_size = DRR_OBJECT_PAYLOAD_SIZE(drro); @@ -215,12 +230,14 @@ zstream_do_decompress(int argc, char *argv[]) case DRR_SPILL: { struct drr_spill *drrs = &drr->drr_u.drr_spill; + VERIFY3S(begin, ==, 1); payload_size = DRR_SPILL_PAYLOAD_SIZE(drrs); (void) sfread(buf, payload_size, stdin); break; } case DRR_WRITE_BYREF: + VERIFY3S(begin, ==, 1); fprintf(stderr, "Deduplicated streams are not supported\n"); exit(1); @@ -228,6 +245,7 @@ zstream_do_decompress(int argc, char *argv[]) case DRR_WRITE: { + VERIFY3S(begin, ==, 1); drrw = &thedrr.drr_u.drr_write; payload_size = DRR_WRITE_PAYLOAD_SIZE(drrw); ENTRY *p; @@ -321,6 +339,7 @@ zstream_do_decompress(int argc, char *argv[]) case DRR_WRITE_EMBEDDED: { + VERIFY3S(begin, ==, 1); struct drr_write_embedded *drrwe = &drr->drr_u.drr_write_embedded; payload_size = @@ -332,6 +351,7 @@ zstream_do_decompress(int argc, char *argv[]) case DRR_FREEOBJECTS: case DRR_FREE: case DRR_OBJECT_RANGE: + VERIFY3S(begin, ==, 1); break; default: diff --git a/sys/contrib/openzfs/cmd/zstream/zstream_recompress.c b/sys/contrib/openzfs/cmd/zstream/zstream_recompress.c index b7370587fc6a..8392ef3de72f 100644 --- a/sys/contrib/openzfs/cmd/zstream/zstream_recompress.c +++ b/sys/contrib/openzfs/cmd/zstream/zstream_recompress.c @@ -138,6 +138,8 @@ zstream_do_recompress(int argc, char *argv[]) fletcher_4_init(); zio_init(); zstd_init(); + int begin = 0; + boolean_t seen = B_FALSE; while (sfread(drr, sizeof (*drr), stdin) != 0) { struct drr_write *drrw; uint64_t payload_size = 0; @@ -155,8 +157,13 @@ zstream_do_recompress(int argc, char *argv[]) case DRR_BEGIN: { ZIO_SET_CHECKSUM(&stream_cksum, 0, 0, 0, 0); + VERIFY0(begin++); + seen = B_TRUE; + + uint32_t sz = drr->drr_payloadlen; + + VERIFY3U(sz, <=, 1U << 28); - int sz = drr->drr_payloadlen; if (sz != 0) { if (sz > bufsz) { buf = realloc(buf, sz); @@ -172,6 +179,13 @@ zstream_do_recompress(int argc, char *argv[]) case DRR_END: { struct drr_end *drre = &drr->drr_u.drr_end; + /* + * We would prefer to just check --begin == 0, but + * replication streams have an end of stream END + * record, so we must avoid tripping it. + */ + VERIFY3B(seen, ==, B_TRUE); + begin--; /* * Use the recalculated checksum, unless this is * the END record of a stream package, which has @@ -185,6 +199,7 @@ zstream_do_recompress(int argc, char *argv[]) case DRR_OBJECT: { struct drr_object *drro = &drr->drr_u.drr_object; + VERIFY3S(begin, ==, 1); if (drro->drr_bonuslen > 0) { payload_size = DRR_OBJECT_PAYLOAD_SIZE(drro); @@ -196,12 +211,14 @@ zstream_do_recompress(int argc, char *argv[]) case DRR_SPILL: { struct drr_spill *drrs = &drr->drr_u.drr_spill; + VERIFY3S(begin, ==, 1); payload_size = DRR_SPILL_PAYLOAD_SIZE(drrs); (void) sfread(buf, payload_size, stdin); break; } case DRR_WRITE_BYREF: + VERIFY3S(begin, ==, 1); fprintf(stderr, "Deduplicated streams are not supported\n"); exit(1); @@ -209,6 +226,7 @@ zstream_do_recompress(int argc, char *argv[]) case DRR_WRITE: { + VERIFY3S(begin, ==, 1); drrw = &thedrr.drr_u.drr_write; payload_size = DRR_WRITE_PAYLOAD_SIZE(drrw); /* @@ -295,6 +313,7 @@ zstream_do_recompress(int argc, char *argv[]) { struct drr_write_embedded *drrwe = &drr->drr_u.drr_write_embedded; + VERIFY3S(begin, ==, 1); payload_size = P2ROUNDUP((uint64_t)drrwe->drr_psize, 8); (void) sfread(buf, payload_size, stdin); @@ -304,6 +323,7 @@ zstream_do_recompress(int argc, char *argv[]) case DRR_FREEOBJECTS: case DRR_FREE: case DRR_OBJECT_RANGE: + VERIFY3S(begin, ==, 1); break; default: diff --git a/sys/contrib/openzfs/cmd/zstream/zstream_redup.c b/sys/contrib/openzfs/cmd/zstream/zstream_redup.c index 5807fabcecb5..c56a09cee75d 100644 --- a/sys/contrib/openzfs/cmd/zstream/zstream_redup.c +++ b/sys/contrib/openzfs/cmd/zstream/zstream_redup.c @@ -222,6 +222,8 @@ zfs_redup_stream(int infd, int outfd, boolean_t verbose) char *buf = safe_calloc(bufsz); FILE *ofp = fdopen(infd, "r"); long offset = ftell(ofp); + int begin = 0; + boolean_t seen = B_FALSE; while (sfread(drr, sizeof (*drr), ofp) != 0) { num_records++; @@ -240,6 +242,8 @@ zfs_redup_stream(int infd, int outfd, boolean_t verbose) struct drr_begin *drrb = &drr->drr_u.drr_begin; int fflags; ZIO_SET_CHECKSUM(&stream_cksum, 0, 0, 0, 0); + VERIFY0(begin++); + seen = B_TRUE; assert(drrb->drr_magic == DMU_BACKUP_MAGIC); @@ -250,7 +254,10 @@ zfs_redup_stream(int infd, int outfd, boolean_t verbose) /* cppcheck-suppress syntaxError */ DMU_SET_FEATUREFLAGS(drrb->drr_versioninfo, fflags); - int sz = drr->drr_payloadlen; + uint32_t sz = drr->drr_payloadlen; + + VERIFY3U(sz, <=, 1U << 28); + if (sz != 0) { if (sz > bufsz) { free(buf); @@ -266,6 +273,13 @@ zfs_redup_stream(int infd, int outfd, boolean_t verbose) case DRR_END: { struct drr_end *drre = &drr->drr_u.drr_end; + /* + * We would prefer to just check --begin == 0, but + * replication streams have an end of stream END + * record, so we must avoid tripping it. + */ + VERIFY3B(seen, ==, B_TRUE); + begin--; /* * Use the recalculated checksum, unless this is * the END record of a stream package, which has @@ -279,6 +293,7 @@ zfs_redup_stream(int infd, int outfd, boolean_t verbose) case DRR_OBJECT: { struct drr_object *drro = &drr->drr_u.drr_object; + VERIFY3S(begin, ==, 1); if (drro->drr_bonuslen > 0) { payload_size = DRR_OBJECT_PAYLOAD_SIZE(drro); @@ -290,6 +305,7 @@ zfs_redup_stream(int infd, int outfd, boolean_t verbose) case DRR_SPILL: { struct drr_spill *drrs = &drr->drr_u.drr_spill; + VERIFY3S(begin, ==, 1); payload_size = DRR_SPILL_PAYLOAD_SIZE(drrs); (void) sfread(buf, payload_size, ofp); break; @@ -299,6 +315,7 @@ zfs_redup_stream(int infd, int outfd, boolean_t verbose) { struct drr_write_byref drrwb = drr->drr_u.drr_write_byref; + VERIFY3S(begin, ==, 1); num_write_byref_records++; @@ -334,6 +351,7 @@ zfs_redup_stream(int infd, int outfd, boolean_t verbose) case DRR_WRITE: { struct drr_write *drrw = &drr->drr_u.drr_write; + VERIFY3S(begin, ==, 1); payload_size = DRR_WRITE_PAYLOAD_SIZE(drrw); (void) sfread(buf, payload_size, ofp); @@ -346,6 +364,7 @@ zfs_redup_stream(int infd, int outfd, boolean_t verbose) { struct drr_write_embedded *drrwe = &drr->drr_u.drr_write_embedded; + VERIFY3S(begin, ==, 1); payload_size = P2ROUNDUP((uint64_t)drrwe->drr_psize, 8); (void) sfread(buf, payload_size, ofp); @@ -355,6 +374,7 @@ zfs_redup_stream(int infd, int outfd, boolean_t verbose) case DRR_FREEOBJECTS: case DRR_FREE: case DRR_OBJECT_RANGE: + VERIFY3S(begin, ==, 1); break; default: diff --git a/sys/contrib/openzfs/cmd/ztest.c b/sys/contrib/openzfs/cmd/ztest.c index 1c7c74c5d80f..9dce486ee08c 100644 --- a/sys/contrib/openzfs/cmd/ztest.c +++ b/sys/contrib/openzfs/cmd/ztest.c @@ -443,7 +443,7 @@ static ztest_info_t ztest_info[] = { ZTI_INIT(ztest_dmu_commit_callbacks, 1, &zopt_always), ZTI_INIT(ztest_zap, 30, &zopt_always), ZTI_INIT(ztest_zap_parallel, 100, &zopt_always), - ZTI_INIT(ztest_split_pool, 1, &zopt_always), + ZTI_INIT(ztest_split_pool, 1, &zopt_sometimes), ZTI_INIT(ztest_zil_commit, 1, &zopt_incessant), ZTI_INIT(ztest_zil_remount, 1, &zopt_sometimes), ZTI_INIT(ztest_dmu_read_write_zcopy, 1, &zopt_often), @@ -1133,14 +1133,14 @@ process_options(int argc, char **argv) const char *invalid_what = "ztest"; char *val = zo->zo_alt_ztest; if (0 != access(val, X_OK) || - (strrchr(val, '/') == NULL && (errno = EINVAL))) + (strrchr(val, '/') == NULL && (errno == EINVAL))) goto invalid; int dirlen = strrchr(val, '/') - val; strlcpy(zo->zo_alt_libpath, val, MIN(sizeof (zo->zo_alt_libpath), dirlen + 1)); invalid_what = "library path", val = zo->zo_alt_libpath; - if (strrchr(val, '/') == NULL && (errno = EINVAL)) + if (strrchr(val, '/') == NULL && (errno == EINVAL)) goto invalid; *strrchr(val, '/') = '\0'; strlcat(val, "/lib", sizeof (zo->zo_alt_libpath)); @@ -2790,12 +2790,12 @@ ztest_io(ztest_ds_t *zd, uint64_t object, uint64_t offset) err = ztest_dsl_prop_set_uint64(zd->zd_name, ZFS_PROP_CHECKSUM, spa_dedup_checksum(ztest_spa), B_FALSE); - VERIFY(err == 0 || err == ENOSPC); + ASSERT(err == 0 || err == ENOSPC); err = ztest_dsl_prop_set_uint64(zd->zd_name, ZFS_PROP_COMPRESSION, ztest_random_dsl_prop(ZFS_PROP_COMPRESSION), B_FALSE); - VERIFY(err == 0 || err == ENOSPC); + ASSERT(err == 0 || err == ENOSPC); (void) pthread_rwlock_unlock(&ztest_name_lock); VERIFY0(dmu_read(zd->zd_os, object, offset, blocksize, data, @@ -3347,8 +3347,9 @@ ztest_vdev_class_add(ztest_ds_t *zd, uint64_t id) spa_special_class(spa)->mc_groups == 1 && ztest_random(2) == 0) { if (ztest_opts.zo_verbose >= 3) (void) printf("Enabling special VDEV small blocks\n"); - (void) ztest_dsl_prop_set_uint64(zd->zd_name, + error = ztest_dsl_prop_set_uint64(zd->zd_name, ZFS_PROP_SPECIAL_SMALL_BLOCKS, 32768, B_FALSE); + ASSERT(error == 0 || error == ENOSPC); } mutex_exit(&ztest_vdev_lock); @@ -3597,6 +3598,7 @@ ztest_vdev_attach_detach(ztest_ds_t *zd, uint64_t id) int newvd_is_spare = B_FALSE; int newvd_is_dspare = B_FALSE; int oldvd_is_log; + int oldvd_is_special; int error, expected_error; if (ztest_opts.zo_mmp_test) @@ -3671,6 +3673,9 @@ ztest_vdev_attach_detach(ztest_ds_t *zd, uint64_t id) oldguid = oldvd->vdev_guid; oldsize = vdev_get_min_asize(oldvd); oldvd_is_log = oldvd->vdev_top->vdev_islog; + oldvd_is_special = + oldvd->vdev_top->vdev_alloc_bias == VDEV_BIAS_SPECIAL || + oldvd->vdev_top->vdev_alloc_bias == VDEV_BIAS_DEDUP; (void) strlcpy(oldpath, oldvd->vdev_path, MAXPATHLEN); pvd = oldvd->vdev_parent; pguid = pvd->vdev_guid; @@ -3749,7 +3754,8 @@ ztest_vdev_attach_detach(ztest_ds_t *zd, uint64_t id) pvd->vdev_ops == &vdev_replacing_ops || pvd->vdev_ops == &vdev_spare_ops)) expected_error = ENOTSUP; - else if (newvd_is_spare && (!replacing || oldvd_is_log)) + else if (newvd_is_spare && + (!replacing || oldvd_is_log || oldvd_is_special)) expected_error = ENOTSUP; else if (newvd == oldvd) expected_error = replacing ? 0 : EBUSY; @@ -4293,7 +4299,7 @@ ztest_snapshot_create(char *osname, uint64_t id) ztest_record_enospc(FTAG); return (B_FALSE); } - if (error != 0 && error != EEXIST) { + if (error != 0 && error != EEXIST && error != ECHRNG) { fatal(B_FALSE, "ztest_snapshot_create(%s@%s) = %d", osname, snapname, error); } @@ -4310,7 +4316,7 @@ ztest_snapshot_destroy(char *osname, uint64_t id) osname, id); error = dsl_destroy_snapshot(snapname, B_FALSE); - if (error != 0 && error != ENOENT) + if (error != 0 && error != ENOENT && error != ECHRNG) fatal(B_FALSE, "ztest_snapshot_destroy(%s) = %d", snapname, error); return (B_TRUE); @@ -4359,9 +4365,16 @@ ztest_dmu_objset_create_destroy(ztest_ds_t *zd, uint64_t id) /* * Verify that the destroyed dataset is no longer in the namespace. + * It may still be present if the destroy above fails with ENOSPC. */ - VERIFY3U(ENOENT, ==, ztest_dmu_objset_own(name, DMU_OST_OTHER, B_TRUE, - B_TRUE, FTAG, &os)); + error = ztest_dmu_objset_own(name, DMU_OST_OTHER, B_TRUE, B_TRUE, + FTAG, &os); + if (error == 0) { + dmu_objset_disown(os, B_TRUE, FTAG); + ztest_record_enospc(FTAG); + goto out; + } + VERIFY3U(ENOENT, ==, error); /* * Verify that we can create a new dataset. @@ -5834,12 +5847,15 @@ ztest_dsl_prop_get_set(ztest_ds_t *zd, uint64_t id) (void) pthread_rwlock_rdlock(&ztest_name_lock); - for (int p = 0; p < sizeof (proplist) / sizeof (proplist[0]); p++) - (void) ztest_dsl_prop_set_uint64(zd->zd_name, proplist[p], + for (int p = 0; p < sizeof (proplist) / sizeof (proplist[0]); p++) { + int error = ztest_dsl_prop_set_uint64(zd->zd_name, proplist[p], ztest_random_dsl_prop(proplist[p]), (int)ztest_random(2)); + ASSERT(error == 0 || error == ENOSPC); + } - VERIFY0(ztest_dsl_prop_set_uint64(zd->zd_name, ZFS_PROP_RECORDSIZE, - ztest_random_blocksize(), (int)ztest_random(2))); + int error = ztest_dsl_prop_set_uint64(zd->zd_name, ZFS_PROP_RECORDSIZE, + ztest_random_blocksize(), (int)ztest_random(2)); + ASSERT(error == 0 || error == ENOSPC); (void) pthread_rwlock_unlock(&ztest_name_lock); } @@ -6313,7 +6329,7 @@ ztest_scrub_impl(spa_t *spa) while (dsl_scan_scrubbing(spa_get_dsl(spa))) txg_wait_synced(spa_get_dsl(spa), 0); - if (spa_get_errlog_size(spa) > 0) + if (spa_approx_errlog_size(spa) > 0) return (ECKSUM); ztest_pool_scrubbed = B_TRUE; diff --git a/sys/contrib/openzfs/config/ax_python_devel.m4 b/sys/contrib/openzfs/config/ax_python_devel.m4 index e6cee04288b8..f6d4b01444d6 100644 --- a/sys/contrib/openzfs/config/ax_python_devel.m4 +++ b/sys/contrib/openzfs/config/ax_python_devel.m4 @@ -97,23 +97,13 @@ AC_DEFUN([AX_PYTHON_DEVEL],[ # Check for a version of Python >= 2.1.0 # AC_MSG_CHECKING([for a version of Python >= '2.1.0']) - ac_supports_python_ver=`cat<= '2.1.0')"` if test "$ac_supports_python_ver" != "True"; then if test -z "$PYTHON_NOVERSIONCHECK"; then AC_MSG_RESULT([no]) - m4_ifvaln([$2],[$2],[ - AC_MSG_FAILURE([ + AC_MSG_FAILURE([ This version of the AC@&t@_PYTHON_DEVEL macro doesn't work properly with versions of Python before 2.1.0. You may need to re-run configure, setting the @@ -122,7 +112,6 @@ PYTHON_EXTRA_LIBS and PYTHON_EXTRA_LDFLAGS by hand. Moreover, to disable this check, set PYTHON_NOVERSIONCHECK to something else than an empty string. ]) - ]) else AC_MSG_RESULT([skip at user request]) fi @@ -131,37 +120,47 @@ to something else than an empty string. fi # - # if the macro parameter ``version'' is set, honour it + # If the macro parameter ``version'' is set, honour it. + # A Python shim class, VPy, is used to implement correct version comparisons via + # string expressions, since e.g. a naive textual ">= 2.7.3" won't work for + # Python 2.7.10 (the ".1" being evaluated as less than ".3"). # if test -n "$1"; then AC_MSG_CHECKING([for a version of Python $1]) - # Why the strip ()? Because if we don't, version.parse - # will, for example, report 3.10.0 >= '3.11.0' - ac_supports_python_ver=`cat< ax_python_devel_vpy.py +class VPy: + def vtup(self, s): + return tuple(map(int, s.strip().replace("rc", ".").split("."))) + def __init__(self): + import sys + self.vpy = tuple(sys.version_info) + def __eq__(self, s): + return self.vpy == self.vtup(s) + def __ne__(self, s): + return self.vpy != self.vtup(s) + def __lt__(self, s): + return self.vpy < self.vtup(s) + def __gt__(self, s): + return self.vpy > self.vtup(s) + def __le__(self, s): + return self.vpy <= self.vtup(s) + def __ge__(self, s): + return self.vpy >= self.vtup(s) +EOF + ac_supports_python_ver=`$PYTHON -c "import ax_python_devel_vpy; \ + ver = ax_python_devel_vpy.VPy(); \ + print (ver $1)"` + rm -rf ax_python_devel_vpy*.py* __pycache__/ax_python_devel_vpy*.py* if test "$ac_supports_python_ver" = "True"; then - AC_MSG_RESULT([yes]) + AC_MSG_RESULT([yes]) else AC_MSG_RESULT([no]) - m4_ifvaln([$2],[$2],[ - AC_MSG_ERROR([this package requires Python $1. + AC_MSG_ERROR([this package requires Python $1. If you have it installed, but it isn't the default Python interpreter in your system path, please pass the PYTHON_VERSION variable to configure. See ``configure --help'' for reference. ]) - PYTHON_VERSION="" - ]) + PYTHON_VERSION="" fi fi diff --git a/sys/contrib/openzfs/config/deb.am b/sys/contrib/openzfs/config/deb.am index 0033dd7591ff..1379e58c40a8 100644 --- a/sys/contrib/openzfs/config/deb.am +++ b/sys/contrib/openzfs/config/deb.am @@ -1,14 +1,17 @@ -PHONY += deb-kmod deb-dkms deb-utils deb deb-local +PHONY += deb-kmod deb-dkms deb-utils deb deb-local native-deb-local \ + native-deb-utils native-deb-kmod native-deb -deb-local: +native-deb-local: @(if test "${HAVE_DPKGBUILD}" = "no"; then \ echo -e "\n" \ "*** Required util ${DPKGBUILD} missing. Please install the\n" \ "*** package for your distribution which provides ${DPKGBUILD},\n" \ "*** re-run configure, and try again.\n"; \ exit 1; \ - fi; \ - if test "${HAVE_ALIEN}" = "no"; then \ + fi) + +deb-local: native-deb-local + @(if test "${HAVE_ALIEN}" = "no"; then \ echo -e "\n" \ "*** Required util ${ALIEN} missing. Please install the\n" \ "*** package for your distribution which provides ${ALIEN},\n" \ @@ -67,7 +70,7 @@ deb-utils: deb-local rpm-utils-initramfs ## to do this, so we install a shim onto the path which calls the real ## dh_shlibdeps with the required arguments. path_prepend=`mktemp -d /tmp/intercept.XXXXXX`; \ - echo "#$(SHELL)" > $${path_prepend}/dh_shlibdeps; \ + echo "#!$(SHELL)" > $${path_prepend}/dh_shlibdeps; \ echo "`which dh_shlibdeps` -- \ -xlibuutil3linux -xlibnvpair3linux -xlibzfs5linux -xlibzpool5linux" \ >> $${path_prepend}/dh_shlibdeps; \ @@ -75,7 +78,7 @@ deb-utils: deb-local rpm-utils-initramfs ## Debianized packages from the auto-generated dependencies of the new debs, ## which should NOT be mixed with the alien-generated debs created here chmod +x $${path_prepend}/dh_shlibdeps; \ - env PATH=$${path_prepend}:$${PATH} \ + env "PATH=$${path_prepend}:$${PATH}" \ fakeroot $(ALIEN) --bump=0 --scripts --to-deb --target=$$debarch \ $$pkg1 $$pkg2 $$pkg3 $$pkg4 $$pkg5 $$pkg6 $$pkg7 \ $$pkg8 $$pkg9 $$pkg10 $$pkg11 || exit 1; \ @@ -85,3 +88,16 @@ deb-utils: deb-local rpm-utils-initramfs $$pkg8 $$pkg9 $$pkg10 $$pkg11; deb: deb-kmod deb-dkms deb-utils + +debian: + cp -r contrib/debian debian; chmod +x debian/rules; + +native-deb-utils: native-deb-local debian + cp contrib/debian/control debian/control; \ + $(DPKGBUILD) -b -rfakeroot -us -uc; + +native-deb-kmod: native-deb-local debian + sh scripts/make_gitrev.sh; \ + fakeroot debian/rules override_dh_binary-modules; + +native-deb: native-deb-utils native-deb-kmod diff --git a/sys/contrib/openzfs/config/kernel-acl.m4 b/sys/contrib/openzfs/config/kernel-acl.m4 index a155b59d006a..6e92da97d0fe 100644 --- a/sys/contrib/openzfs/config/kernel-acl.m4 +++ b/sys/contrib/openzfs/config/kernel-acl.m4 @@ -165,6 +165,9 @@ dnl # dnl # 5.15 API change, dnl # Added the bool rcu argument to get_acl for rcu path walk. dnl # +dnl # 6.2 API change, +dnl # get_acl() was renamed to get_inode_acl() +dnl # AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_OPERATIONS_GET_ACL], [ ZFS_LINUX_TEST_SRC([inode_operations_get_acl], [ #include @@ -189,6 +192,18 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_OPERATIONS_GET_ACL], [ .get_acl = get_acl_fn, }; ],[]) + + ZFS_LINUX_TEST_SRC([inode_operations_get_inode_acl], [ + #include + + struct posix_acl *get_inode_acl_fn(struct inode *inode, int type, + bool rcu) { return NULL; } + + static const struct inode_operations + iops __attribute__ ((unused)) = { + .get_inode_acl = get_inode_acl_fn, + }; + ],[]) ]) AC_DEFUN([ZFS_AC_KERNEL_INODE_OPERATIONS_GET_ACL], [ @@ -201,7 +216,12 @@ AC_DEFUN([ZFS_AC_KERNEL_INODE_OPERATIONS_GET_ACL], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_GET_ACL_RCU, 1, [iops->get_acl() takes rcu]) ],[ - ZFS_LINUX_TEST_ERROR([iops->get_acl()]) + ZFS_LINUX_TEST_RESULT([inode_operations_get_inode_acl], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_GET_INODE_ACL, 1, [has iops->get_inode_acl()]) + ],[ + ZFS_LINUX_TEST_ERROR([iops->get_acl() or iops->get_inode_acl()]) + ]) ]) ]) ]) @@ -213,7 +233,22 @@ dnl # dnl # 5.12 API change, dnl # set_acl() added a user_namespace* parameter first dnl # +dnl # 6.2 API change, +dnl # set_acl() second paramter changed to a struct dentry * +dnl # AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_OPERATIONS_SET_ACL], [ + ZFS_LINUX_TEST_SRC([inode_operations_set_acl_userns_dentry], [ + #include + + int set_acl_fn(struct user_namespace *userns, + struct dentry *dent, struct posix_acl *acl, + int type) { return 0; } + + static const struct inode_operations + iops __attribute__ ((unused)) = { + .set_acl = set_acl_fn, + }; + ],[]) ZFS_LINUX_TEST_SRC([inode_operations_set_acl_userns], [ #include @@ -246,11 +281,18 @@ AC_DEFUN([ZFS_AC_KERNEL_INODE_OPERATIONS_SET_ACL], [ AC_DEFINE(HAVE_SET_ACL, 1, [iops->set_acl() exists]) AC_DEFINE(HAVE_SET_ACL_USERNS, 1, [iops->set_acl() takes 4 args]) ],[ - ZFS_LINUX_TEST_RESULT([inode_operations_set_acl], [ + ZFS_LINUX_TEST_RESULT([inode_operations_set_acl_userns_dentry], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_SET_ACL, 1, [iops->set_acl() exists, takes 3 args]) + AC_DEFINE(HAVE_SET_ACL, 1, [iops->set_acl() exists]) + AC_DEFINE(HAVE_SET_ACL_USERNS_DENTRY_ARG2, 1, + [iops->set_acl() takes 4 args, arg2 is struct dentry *]) ],[ - AC_MSG_RESULT(no) + ZFS_LINUX_TEST_RESULT([inode_operations_set_acl], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_SET_ACL, 1, [iops->set_acl() exists, takes 3 args]) + ],[ + ZFS_LINUX_REQUIRE_API([i_op->set_acl()], [3.14]) + ]) ]) ]) ]) diff --git a/sys/contrib/openzfs/config/kernel-tmpfile.m4 b/sys/contrib/openzfs/config/kernel-tmpfile.m4 index 45c2e6ceea52..0e1deb3612f3 100644 --- a/sys/contrib/openzfs/config/kernel-tmpfile.m4 +++ b/sys/contrib/openzfs/config/kernel-tmpfile.m4 @@ -3,11 +3,25 @@ dnl # 3.11 API change dnl # Add support for i_op->tmpfile dnl # AC_DEFUN([ZFS_AC_KERNEL_SRC_TMPFILE], [ + dnl # + dnl # 6.1 API change + dnl # use struct file instead of struct dentry + dnl # + ZFS_LINUX_TEST_SRC([inode_operations_tmpfile], [ + #include + int tmpfile(struct user_namespace *userns, + struct inode *inode, struct file *file, + umode_t mode) { return 0; } + static struct inode_operations + iops __attribute__ ((unused)) = { + .tmpfile = tmpfile, + }; + ],[]) dnl # dnl # 5.11 API change dnl # add support for userns parameter to tmpfile dnl # - ZFS_LINUX_TEST_SRC([inode_operations_tmpfile_userns], [ + ZFS_LINUX_TEST_SRC([inode_operations_tmpfile_dentry_userns], [ #include int tmpfile(struct user_namespace *userns, struct inode *inode, struct dentry *dentry, @@ -17,7 +31,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_TMPFILE], [ .tmpfile = tmpfile, }; ],[]) - ZFS_LINUX_TEST_SRC([inode_operations_tmpfile], [ + ZFS_LINUX_TEST_SRC([inode_operations_tmpfile_dentry], [ #include int tmpfile(struct inode *inode, struct dentry *dentry, umode_t mode) { return 0; } @@ -30,16 +44,24 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_TMPFILE], [ AC_DEFUN([ZFS_AC_KERNEL_TMPFILE], [ AC_MSG_CHECKING([whether i_op->tmpfile() exists]) - ZFS_LINUX_TEST_RESULT([inode_operations_tmpfile_userns], [ + ZFS_LINUX_TEST_RESULT([inode_operations_tmpfile], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_TMPFILE, 1, [i_op->tmpfile() exists]) AC_DEFINE(HAVE_TMPFILE_USERNS, 1, [i_op->tmpfile() has userns]) ],[ - ZFS_LINUX_TEST_RESULT([inode_operations_tmpfile], [ + ZFS_LINUX_TEST_RESULT([inode_operations_tmpfile_dentry_userns], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_TMPFILE, 1, [i_op->tmpfile() exists]) + AC_DEFINE(HAVE_TMPFILE_USERNS, 1, [i_op->tmpfile() has userns]) + AC_DEFINE(HAVE_TMPFILE_DENTRY, 1, [i_op->tmpfile() uses old dentry signature]) ],[ - AC_MSG_RESULT(no) + ZFS_LINUX_TEST_RESULT([inode_operations_tmpfile_dentry], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_TMPFILE, 1, [i_op->tmpfile() exists]) + AC_DEFINE(HAVE_TMPFILE_DENTRY, 1, [i_op->tmpfile() uses old dentry signature]) + ],[ + ZFS_LINUX_REQUIRE_API([i_op->tmpfile()], [3.11]) + ]) ]) ]) ]) diff --git a/sys/contrib/openzfs/config/kernel.m4 b/sys/contrib/openzfs/config/kernel.m4 index c71d576f492e..353988e9c867 100644 --- a/sys/contrib/openzfs/config/kernel.m4 +++ b/sys/contrib/openzfs/config/kernel.m4 @@ -958,3 +958,35 @@ AC_DEFUN([ZFS_LINUX_TRY_COMPILE_HEADER], [ [test -f build/conftest/conftest.ko], [$3], [$4], [$5]) ]) ]) + +dnl # +dnl # AS_VERSION_COMPARE_LE +dnl # like AS_VERSION_COMPARE_LE, but runs $3 if (and only if) $1 <= $2 +dnl # AS_VERSION_COMPARE_LE (version-1, version-2, [action-if-less-or-equal], [action-if-greater]) +dnl # +AC_DEFUN([AS_VERSION_COMPARE_LE], [ + AS_VERSION_COMPARE([$1], [$2], [$3], [$3], [$4]) +]) + +dnl # +dnl # ZFS_LINUX_REQUIRE_API +dnl # like ZFS_LINUX_TEST_ERROR, except only fails if the kernel is +dnl # at least some specified version. +dnl # +AC_DEFUN([ZFS_LINUX_REQUIRE_API], [ + AS_VERSION_COMPARE_LE([$2], [$kernsrcver], [ + AC_MSG_ERROR([ + *** None of the expected "$1" interfaces were detected. This + *** interface is expected for kernels version "$2" and above. + *** This may be because your kernel version is newer than what is + *** supported, or you are using a patched custom kernel with + *** incompatible modifications. Newer kernels may have incompatible + *** APIs. + *** + *** ZFS Version: $ZFS_META_ALIAS + *** Compatible Kernels: $ZFS_META_KVER_MIN - $ZFS_META_KVER_MAX + ]) + ], [ + AC_MSG_RESULT(no) + ]) +]) diff --git a/sys/contrib/openzfs/config/zfs-build.m4 b/sys/contrib/openzfs/config/zfs-build.m4 index bb3c81a647fe..2703e6c016c4 100644 --- a/sys/contrib/openzfs/config/zfs-build.m4 +++ b/sys/contrib/openzfs/config/zfs-build.m4 @@ -464,6 +464,7 @@ AC_DEFUN([ZFS_AC_DPKG], [ AC_SUBST(HAVE_DPKGBUILD) AC_SUBST(DPKGBUILD) AC_SUBST(DPKGBUILD_VERSION) + AC_SUBST([CFGOPTS], ["$CFGOPTS"]) ]) dnl # diff --git a/sys/contrib/openzfs/configure.ac b/sys/contrib/openzfs/configure.ac index 5cb25b32ae2c..4c75616e4299 100644 --- a/sys/contrib/openzfs/configure.ac +++ b/sys/contrib/openzfs/configure.ac @@ -32,6 +32,7 @@ AC_INIT(m4_esyscmd(awk '/^Name:/ {printf $2}' META), m4_esyscmd(awk '/^Version:/ {printf $2}' META)) +CFGOPTS="$*" AC_LANG(C) ZFS_AC_META AC_CONFIG_AUX_DIR([config]) @@ -65,6 +66,7 @@ ZFS_AC_DEBUG_KMEM_TRACKING ZFS_AC_DEBUG_INVARIANTS AC_CONFIG_FILES([ + contrib/debian/rules Makefile include/Makefile lib/libzfs/libzfs.pc diff --git a/sys/contrib/openzfs/contrib/debian/.gitignore b/sys/contrib/openzfs/contrib/debian/.gitignore new file mode 100644 index 000000000000..de7475d8888c --- /dev/null +++ b/sys/contrib/openzfs/contrib/debian/.gitignore @@ -0,0 +1 @@ +rules diff --git a/sys/contrib/openzfs/contrib/debian/control b/sys/contrib/openzfs/contrib/debian/control index a0db4985ed1a..b9bb23b09ba0 100644 --- a/sys/contrib/openzfs/contrib/debian/control +++ b/sys/contrib/openzfs/contrib/debian/control @@ -2,8 +2,7 @@ Source: openzfs-linux Section: contrib/kernel Priority: optional Maintainer: ZFS on Linux specific mailing list -Build-Depends: abigail-tools, - debhelper-compat (= 12), +Build-Depends: debhelper-compat (= 12), dh-python, dkms (>> 2.1.1.2-5), libaio-dev, diff --git a/sys/contrib/openzfs/contrib/debian/openzfs-libpam-zfs.postinst b/sys/contrib/openzfs/contrib/debian/openzfs-libpam-zfs.postinst index 2db86744e4e6..03893454eee9 100644 --- a/sys/contrib/openzfs/contrib/debian/openzfs-libpam-zfs.postinst +++ b/sys/contrib/openzfs/contrib/debian/openzfs-libpam-zfs.postinst @@ -1,6 +1,8 @@ #!/bin/sh set -e -pam-auth-update --package +if ! $(ldd "/lib/$(dpkg-architecture -qDEB_HOST_MULTIARCH)/security/pam_zfs_key.so" | grep -q "libasan") ; then + pam-auth-update --package +fi #DEBHELPER# diff --git a/sys/contrib/openzfs/contrib/debian/openzfs-zfs-zed.postinst b/sys/contrib/openzfs/contrib/debian/openzfs-zfs-zed.postinst index a615eec95760..ac14957a3fe1 100644 --- a/sys/contrib/openzfs/contrib/debian/openzfs-zfs-zed.postinst +++ b/sys/contrib/openzfs/contrib/debian/openzfs-zfs-zed.postinst @@ -4,13 +4,6 @@ set -e zedd="/usr/lib/zfs-linux/zed.d" etcd="/etc/zfs/zed.d" -# enable all default zedlets that are not overridden -while read -r file ; do - etcfile="${etcd}/${file}" - [ -e "${etcfile}" ] && continue - ln -sfT "${zedd}/${file}" "${etcfile}" -done < "${zedd}/DEFAULT-ENABLED" - # remove the overrides created in prerm find "${etcd}" -maxdepth 1 -lname '/dev/null' -delete # remove any dangling symlinks to old zedlets diff --git a/sys/contrib/openzfs/contrib/debian/openzfs-zfs-zed.prerm b/sys/contrib/openzfs/contrib/debian/openzfs-zfs-zed.prerm deleted file mode 100644 index b8340df53438..000000000000 --- a/sys/contrib/openzfs/contrib/debian/openzfs-zfs-zed.prerm +++ /dev/null @@ -1,16 +0,0 @@ -#!/bin/sh -set -e - -zedd="/usr/lib/zfs-linux/zed.d" -etcd="/etc/zfs/zed.d" - -if [ "$1" != "failed-upgrade" ] && [ -d "${etcd}" ] && [ -d "${zedd}" ] ; then - while read -r file ; do - etcfile="${etcd}/${file}" - ( [ -L "${etcfile}" ] || [ -e "${etcfile}" ] ) && continue - ln -sT /dev/null "${etcfile}" - done < "${zedd}/DEFAULT-ENABLED" -fi - -#DEBHELPER# - diff --git a/sys/contrib/openzfs/contrib/debian/openzfs-zfsutils.install b/sys/contrib/openzfs/contrib/debian/openzfs-zfsutils.install index e10a50e012c1..9c7b05451bab 100644 --- a/sys/contrib/openzfs/contrib/debian/openzfs-zfsutils.install +++ b/sys/contrib/openzfs/contrib/debian/openzfs-zfsutils.install @@ -131,5 +131,4 @@ usr/share/man/man8/zstreamdump.8 usr/share/man/man4/spl.4 usr/share/man/man4/zfs.4 usr/share/man/man7/zpool-features.7 -usr/share/man/man7/dracut.zfs.7 usr/share/man/man8/zpool_influxdb.8 diff --git a/sys/contrib/openzfs/contrib/debian/openzfs-zfsutils.postinst b/sys/contrib/openzfs/contrib/debian/openzfs-zfsutils.postinst index b13a78654c37..7dc208d0dd7b 100644 --- a/sys/contrib/openzfs/contrib/debian/openzfs-zfsutils.postinst +++ b/sys/contrib/openzfs/contrib/debian/openzfs-zfsutils.postinst @@ -1,21 +1,6 @@ #!/bin/sh set -e -# The hostname and hostid of the last system to access a ZFS pool are stored in -# the ZFS pool itself. A pool is foreign if, during `zpool import`, the -# current hostname and hostid are different than the stored values thereof. -# -# The only way of having a stable hostid is to define it in /etc/hostid. -# This postinst helper will check if we already have the hostid stabilized by -# checking the existence of the file /etc/hostid to be 4 bytes at least. -# If this file don't already exists on our system or has less than 4 bytes, then -# a new (random) value is generated with zgenhostid (8) and stored in -# /etc/hostid - -if [ ! -f /etc/hostid ] || [ "$(stat -c %s /etc/hostid)" -lt 4 ] ; then - zgenhostid -fi - # When processed to here but zfs kernel module is not loaded, the subsequent # services would fail to start. In this case the installation process just # fails at the postinst stage. The user could do diff --git a/sys/contrib/openzfs/contrib/debian/rules b/sys/contrib/openzfs/contrib/debian/rules.in similarity index 99% rename from sys/contrib/openzfs/contrib/debian/rules rename to sys/contrib/openzfs/contrib/debian/rules.in index 5f4889445bea..63892c6ca243 100755 --- a/sys/contrib/openzfs/contrib/debian/rules +++ b/sys/contrib/openzfs/contrib/debian/rules.in @@ -35,7 +35,7 @@ override_dh_autoreconf: override_dh_auto_configure: @# Build the userland, but don't build the kernel modules. - dh_auto_configure -- \ + dh_auto_configure -- @CFGOPTS@ \ --bindir=/usr/bin \ --sbindir=/sbin \ --libdir=/lib/"$(DEB_HOST_MULTIARCH)" \ @@ -195,7 +195,7 @@ override_dh_prep-deb-files: override_dh_configure_modules: override_dh_configure_modules_stamp override_dh_configure_modules_stamp: - ./configure \ + ./configure @CFGOPTS@ \ --with-config=kernel \ --with-linux=$(KSRC) \ --with-linux-obj=$(KOBJ) diff --git a/sys/contrib/openzfs/contrib/dracut/90zfs/zfs-load-key.sh.in b/sys/contrib/openzfs/contrib/dracut/90zfs/zfs-load-key.sh.in index d916f43b4e95..8e6846831e6f 100755 --- a/sys/contrib/openzfs/contrib/dracut/90zfs/zfs-load-key.sh.in +++ b/sys/contrib/openzfs/contrib/dracut/90zfs/zfs-load-key.sh.in @@ -34,7 +34,7 @@ _load_key_cb() { case "${KEYLOCATION%%://*}" in prompt) for _ in 1 2 3; do - systemd-ask-password --no-tty "Encrypted ZFS password for ${dataset}" | zfs load-key "${ENCRYPTIONROOT}" && break + systemd-ask-password --timeout=0 --no-tty "Encrypted ZFS password for ${dataset}" | zfs load-key "${ENCRYPTIONROOT}" && break done ;; http*) diff --git a/sys/contrib/openzfs/contrib/initramfs/scripts/zfs b/sys/contrib/openzfs/contrib/initramfs/scripts/zfs index 587dd5eba9ef..c724f0c2cf57 100644 --- a/sys/contrib/openzfs/contrib/initramfs/scripts/zfs +++ b/sys/contrib/openzfs/contrib/initramfs/scripts/zfs @@ -333,25 +333,21 @@ mount_fs() # Can't use the mountpoint property. Might be one of our # clones. Check the 'org.zol:mountpoint' property set in # clone_snap() if that's usable. - mountpoint=$(get_fs_value "$fs" org.zol:mountpoint) - if [ "$mountpoint" = "legacy" ] || - [ "$mountpoint" = "none" ] || - [ "$mountpoint" = "-" ] + mountpoint1=$(get_fs_value "$fs" org.zol:mountpoint) + if [ "$mountpoint1" = "legacy" ] || + [ "$mountpoint1" = "none" ] || + [ "$mountpoint1" = "-" ] then if [ "$fs" != "${ZFS_BOOTFS}" ]; then # We don't have a proper mountpoint and this # isn't the root fs. return 0 - else - # Last hail-mary: Hope 'rootmnt' is set! - mountpoint="" fi - fi - - # If it's not a legacy filesystem, it can only be a - # native one... - if [ "$mountpoint" = "legacy" ]; then ZFS_CMD="mount.zfs" + # Last hail-mary: Hope 'rootmnt' is set! + mountpoint="" + else + mountpoint="$mountpoint1" fi fi @@ -505,7 +501,7 @@ clone_snap() echo "Error: $ZFS_ERROR" echo "" echo "Failed to clone snapshot." - echo "Make sure that the any problems are corrected and then make sure" + echo "Make sure that any problems are corrected and then make sure" echo "that the dataset '$destfs' exists and is bootable." shell else diff --git a/sys/contrib/openzfs/contrib/pam_zfs_key/pam_zfs_key.c b/sys/contrib/openzfs/contrib/pam_zfs_key/pam_zfs_key.c index e3fa9e9b2553..99cdb8d7733f 100644 --- a/sys/contrib/openzfs/contrib/pam_zfs_key/pam_zfs_key.c +++ b/sys/contrib/openzfs/contrib/pam_zfs_key/pam_zfs_key.c @@ -535,7 +535,7 @@ zfs_key_config_get_dataset(zfs_key_config_t *config) return (NULL); } - (void) zfs_iter_filesystems(zhp, find_dsname_by_prop_value, + (void) zfs_iter_filesystems(zhp, 0, find_dsname_by_prop_value, config); zfs_close(zhp); char *dsname = config->dsname; diff --git a/sys/contrib/openzfs/etc/systemd/system-generators/zfs-mount-generator.c b/sys/contrib/openzfs/etc/systemd/system-generators/zfs-mount-generator.c index b07574e72afe..ab5dc4d78d60 100644 --- a/sys/contrib/openzfs/etc/systemd/system-generators/zfs-mount-generator.c +++ b/sys/contrib/openzfs/etc/systemd/system-generators/zfs-mount-generator.c @@ -224,9 +224,10 @@ line_worker(char *line, const char *cachefile) const char *p_systemd_ignore = strtok_r(NULL, "\t", &toktmp) ?: "-"; /* END CSTYLED */ - const char *pool = dataset; - if ((toktmp = strchr(pool, '/')) != NULL) - pool = strndupa(pool, toktmp - pool); + size_t pool_len = strlen(dataset); + if ((toktmp = strchr(dataset, '/')) != NULL) + pool_len = toktmp - dataset; + const char *pool = *(tofree++) = strndup(dataset, pool_len); if (p_nbmand == NULL) { fprintf(stderr, PROGNAME "[%d]: %s: not enough tokens!\n", @@ -734,7 +735,7 @@ line_worker(char *line, const char *cachefile) if (tofree >= tofree_all + nitems(tofree_all)) { /* * This won't happen as-is: - * we've got 8 slots and allocate 4 things at most. + * we've got 8 slots and allocate 5 things at most. */ fprintf(stderr, PROGNAME "[%d]: %s: need to free %zu > %zu!\n", diff --git a/sys/contrib/openzfs/etc/systemd/system/zfs-zed.service.in b/sys/contrib/openzfs/etc/systemd/system/zfs-zed.service.in index 73a83e59e510..be2fc67348f9 100644 --- a/sys/contrib/openzfs/etc/systemd/system/zfs-zed.service.in +++ b/sys/contrib/openzfs/etc/systemd/system/zfs-zed.service.in @@ -6,7 +6,7 @@ ConditionPathIsDirectory=/sys/module/zfs [Service] EnvironmentFile=-@initconfdir@/zfs ExecStart=@sbindir@/zed -F -Restart=on-abort +Restart=always [Install] Alias=zed.service diff --git a/sys/contrib/openzfs/include/Makefile.am b/sys/contrib/openzfs/include/Makefile.am index 19726bba1864..1e5c71150eeb 100644 --- a/sys/contrib/openzfs/include/Makefile.am +++ b/sys/contrib/openzfs/include/Makefile.am @@ -20,6 +20,7 @@ COMMON_H = \ sys/aggsum.h \ sys/arc.h \ sys/arc_impl.h \ + sys/asm_linkage.h \ sys/avl.h \ sys/avl_impl.h \ sys/bitops.h \ diff --git a/sys/contrib/openzfs/include/libzfs.h b/sys/contrib/openzfs/include/libzfs.h index 2806d1f7cff5..05b4dfe35c76 100644 --- a/sys/contrib/openzfs/include/libzfs.h +++ b/sys/contrib/openzfs/include/libzfs.h @@ -646,19 +646,27 @@ _LIBZFS_H void zprop_print_one_property(const char *, zprop_get_cbdata_t *, /* * Iterator functions. */ +#define ZFS_ITER_RECURSE (1 << 0) +#define ZFS_ITER_ARGS_CAN_BE_PATHS (1 << 1) +#define ZFS_ITER_PROP_LISTSNAPS (1 << 2) +#define ZFS_ITER_DEPTH_LIMIT (1 << 3) +#define ZFS_ITER_RECVD_PROPS (1 << 4) +#define ZFS_ITER_LITERAL_PROPS (1 << 5) +#define ZFS_ITER_SIMPLE (1 << 6) + typedef int (*zfs_iter_f)(zfs_handle_t *, void *); _LIBZFS_H int zfs_iter_root(libzfs_handle_t *, zfs_iter_f, void *); -_LIBZFS_H int zfs_iter_children(zfs_handle_t *, zfs_iter_f, void *); -_LIBZFS_H int zfs_iter_dependents(zfs_handle_t *, boolean_t, zfs_iter_f, +_LIBZFS_H int zfs_iter_children(zfs_handle_t *, int, zfs_iter_f, void *); +_LIBZFS_H int zfs_iter_dependents(zfs_handle_t *, int, boolean_t, zfs_iter_f, void *); -_LIBZFS_H int zfs_iter_filesystems(zfs_handle_t *, zfs_iter_f, void *); -_LIBZFS_H int zfs_iter_snapshots(zfs_handle_t *, boolean_t, zfs_iter_f, void *, +_LIBZFS_H int zfs_iter_filesystems(zfs_handle_t *, int, zfs_iter_f, void *); +_LIBZFS_H int zfs_iter_snapshots(zfs_handle_t *, int, zfs_iter_f, void *, uint64_t, uint64_t); -_LIBZFS_H int zfs_iter_snapshots_sorted(zfs_handle_t *, zfs_iter_f, void *, +_LIBZFS_H int zfs_iter_snapshots_sorted(zfs_handle_t *, int, zfs_iter_f, void *, uint64_t, uint64_t); -_LIBZFS_H int zfs_iter_snapspec(zfs_handle_t *, const char *, zfs_iter_f, +_LIBZFS_H int zfs_iter_snapspec(zfs_handle_t *, int, const char *, zfs_iter_f, void *); -_LIBZFS_H int zfs_iter_bookmarks(zfs_handle_t *, zfs_iter_f, void *); +_LIBZFS_H int zfs_iter_bookmarks(zfs_handle_t *, int, zfs_iter_f, void *); _LIBZFS_H int zfs_iter_mounted(zfs_handle_t *, zfs_iter_f, void *); typedef struct get_all_cb { @@ -732,6 +740,9 @@ typedef struct sendflags { /* show progress (ie. -v) */ boolean_t progress; + /* show progress as process title (ie. -V) */ + boolean_t progressastitle; + /* large blocks (>128K) are permitted */ boolean_t largeblock; diff --git a/sys/contrib/openzfs/include/libzutil.h b/sys/contrib/openzfs/include/libzutil.h index 617dd0cd1715..4d4bddaad5f3 100644 --- a/sys/contrib/openzfs/include/libzutil.h +++ b/sys/contrib/openzfs/include/libzutil.h @@ -170,7 +170,9 @@ struct zfs_cmd; * List of colors to use */ #define ANSI_RED "\033[0;31m" +#define ANSI_GREEN "\033[0;32m" #define ANSI_YELLOW "\033[0;33m" +#define ANSI_BLUE "\033[0;34m" #define ANSI_RESET "\033[0m" #define ANSI_BOLD "\033[1m" @@ -180,6 +182,13 @@ _LIBZUTIL_H int printf_color(const char *color, const char *format, ...); _LIBZUTIL_H const char *zfs_basename(const char *path); _LIBZUTIL_H ssize_t zfs_dirnamelen(const char *path); +#ifdef __linux__ +_LIBZUTIL_H void zfs_setproctitle_init(int argc, char *argv[], char *envp[]); +_LIBZUTIL_H void zfs_setproctitle(const char *fmt, ...); +#else +#define zfs_setproctitle(fmt, ...) setproctitle(fmt, ##__VA_ARGS__) +#define zfs_setproctitle_init(x, y, z) ((void)0) +#endif /* * These functions are used by the ZFS libraries and cmd/zpool code, but are diff --git a/sys/contrib/openzfs/include/os/freebsd/Makefile.am b/sys/contrib/openzfs/include/os/freebsd/Makefile.am index a750f52e7d25..89d4ef564d5f 100644 --- a/sys/contrib/openzfs/include/os/freebsd/Makefile.am +++ b/sys/contrib/openzfs/include/os/freebsd/Makefile.am @@ -6,6 +6,8 @@ noinst_HEADERS = \ \ %D%/spl/rpc/xdr.h \ \ + %D%/spl/sys/ia32/asm_linkage.h \ + \ %D%/spl/sys/acl.h \ %D%/spl/sys/acl_impl.h \ %D%/spl/sys/atomic.h \ @@ -88,3 +90,4 @@ noinst_HEADERS = \ %D%/zfs/sys/zfs_vnops_os.h \ %D%/zfs/sys/zfs_znode_impl.h \ %D%/zfs/sys/zpl.h + diff --git a/sys/contrib/openzfs/include/os/freebsd/spl/sys/ia32/asm_linkage.h b/sys/contrib/openzfs/include/os/freebsd/spl/sys/ia32/asm_linkage.h new file mode 100644 index 000000000000..058d600007af --- /dev/null +++ b/sys/contrib/openzfs/include/os/freebsd/spl/sys/ia32/asm_linkage.h @@ -0,0 +1,178 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or https://opensource.org/licenses/CDDL-1.0. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _IA32_SYS_ASM_LINKAGE_H +#define _IA32_SYS_ASM_LINKAGE_H + +#define RET ret + +/* Tell compiler to call assembler like Unix */ +#undef ASMABI +#define ASMABI __attribute__((sysv_abi)) + +#define ENDBR + +#define SECTION_TEXT .text +#define SECTION_STATIC .data + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef _ASM /* The remainder of this file is only for assembly files */ + + +/* + * make annoying differences in assembler syntax go away + */ + +/* + * D16 and A16 are used to insert instructions prefixes; the + * macros help the assembler code be slightly more portable. + */ +#if !defined(__GNUC_AS__) +/* + * /usr/ccs/bin/as prefixes are parsed as separate instructions + */ +#define D16 data16; +#define A16 addr16; + +/* + * (There are some weird constructs in constant expressions) + */ +#define _CONST(const) [const] +#define _BITNOT(const) -1!_CONST(const) +#define _MUL(a, b) _CONST(a \* b) + +#else +/* + * Why not use the 'data16' and 'addr16' prefixes .. well, the + * assembler doesn't quite believe in real mode, and thus argues with + * us about what we're trying to do. + */ +#define D16 .byte 0x66; +#define A16 .byte 0x67; + +#define _CONST(const) (const) +#define _BITNOT(const) ~_CONST(const) +#define _MUL(a, b) _CONST(a * b) + +#endif + +/* + * C pointers are different sizes between i386 and amd64. + * These constants can be used to compute offsets into pointer arrays. + */ +#if defined(__amd64) +#define CLONGSHIFT 3 +#define CLONGSIZE 8 +#define CLONGMASK 7 +#elif defined(__i386) +#define CLONGSHIFT 2 +#define CLONGSIZE 4 +#define CLONGMASK 3 +#endif + +/* + * Since we know we're either ILP32 or LP64 .. + */ +#define CPTRSHIFT CLONGSHIFT +#define CPTRSIZE CLONGSIZE +#define CPTRMASK CLONGMASK + +#if CPTRSIZE != (1 << CPTRSHIFT) || CLONGSIZE != (1 << CLONGSHIFT) +#error "inconsistent shift constants" +#endif + +#if CPTRMASK != (CPTRSIZE - 1) || CLONGMASK != (CLONGSIZE - 1) +#error "inconsistent mask constants" +#endif + +#define ASM_ENTRY_ALIGN 16 + +/* + * SSE register alignment and save areas + */ + +#define XMM_SIZE 16 +#define XMM_ALIGN 16 + +/* + * ENTRY provides the standard procedure entry code and an easy way to + * insert the calls to mcount for profiling. ENTRY_NP is identical, but + * never calls mcount. + */ +#define ENTRY(x) \ + .text; \ + .balign ASM_ENTRY_ALIGN; \ + .globl x; \ +x: MCOUNT(x) + +#define ENTRY_NP(x) \ + .text; \ + .balign ASM_ENTRY_ALIGN; \ + .globl x; \ +x: + +#define ENTRY_ALIGN(x, a) \ + .text; \ + .balign a; \ + .globl x; \ +x: + +/* + * ENTRY2 is identical to ENTRY but provides two labels for the entry point. + */ +#define ENTRY2(x, y) \ + .text; \ + .balign ASM_ENTRY_ALIGN; \ + .globl x, y; \ +x:; \ +y: MCOUNT(x) + +#define ENTRY_NP2(x, y) \ + .text; \ + .balign ASM_ENTRY_ALIGN; \ + .globl x, y; \ +x:; \ +y: + + +/* + * SET_SIZE trails a function and set the size for the ELF symbol table. + */ +#define SET_SIZE(x) + +#define SET_OBJ(x) + + +#endif /* _ASM */ + +#ifdef __cplusplus +} +#endif + +#endif /* _IA32_SYS_ASM_LINKAGE_H */ diff --git a/sys/contrib/openzfs/include/os/linux/Makefile.am b/sys/contrib/openzfs/include/os/linux/Makefile.am index 13ba8060c62d..e20702d332ac 100644 --- a/sys/contrib/openzfs/include/os/linux/Makefile.am +++ b/sys/contrib/openzfs/include/os/linux/Makefile.am @@ -109,4 +109,8 @@ kernel_spl_sys_HEADERS = \ %D%/spl/sys/wmsum.h \ %D%/spl/sys/zmod.h \ %D%/spl/sys/zone.h + +kernel_spl_ia32dir = $(kernel_spl_sysdir)/ia32 +kernel_spl_ia32_HEADERS = \ + %D%/spl/sys/ia32/asm_linkage.h endif diff --git a/sys/contrib/openzfs/include/os/linux/kernel/linux/blkdev_compat.h b/sys/contrib/openzfs/include/os/linux/kernel/linux/blkdev_compat.h index 45de1f4993f1..f04eb5b2593f 100644 --- a/sys/contrib/openzfs/include/os/linux/kernel/linux/blkdev_compat.h +++ b/sys/contrib/openzfs/include/os/linux/kernel/linux/blkdev_compat.h @@ -394,7 +394,11 @@ vdev_lookup_bdev(const char *path, dev_t *dev) static inline void bio_set_op_attrs(struct bio *bio, unsigned rw, unsigned flags) { +#if defined(HAVE_BIO_BI_OPF) + bio->bi_opf = rw | flags; +#else bio->bi_rw |= rw | flags; +#endif /* HAVE_BIO_BI_OPF */ } #endif diff --git a/sys/contrib/openzfs/include/os/linux/kernel/linux/dcache_compat.h b/sys/contrib/openzfs/include/os/linux/kernel/linux/dcache_compat.h index 4de1118daafa..c43e5b142fd6 100644 --- a/sys/contrib/openzfs/include/os/linux/kernel/linux/dcache_compat.h +++ b/sys/contrib/openzfs/include/os/linux/kernel/linux/dcache_compat.h @@ -35,6 +35,10 @@ #define d_make_root(inode) d_alloc_root(inode) #endif /* HAVE_D_MAKE_ROOT */ +#ifdef HAVE_DENTRY_D_U_ALIASES +#define d_alias d_u.d_alias +#endif + /* * 2.6.30 API change, * The const keyword was added to the 'struct dentry_operations' in @@ -70,11 +74,7 @@ zpl_d_drop_aliases(struct inode *inode) { struct dentry *dentry; spin_lock(&inode->i_lock); -#ifdef HAVE_DENTRY_D_U_ALIASES - hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) { -#else hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) { -#endif if (!IS_ROOT(dentry) && !d_mountpoint(dentry) && (dentry->d_inode == inode)) { d_drop(dentry); diff --git a/sys/contrib/openzfs/include/os/linux/kernel/linux/simd_powerpc.h b/sys/contrib/openzfs/include/os/linux/kernel/linux/simd_powerpc.h index 2a2f92bc499d..f1de3ad01656 100644 --- a/sys/contrib/openzfs/include/os/linux/kernel/linux/simd_powerpc.h +++ b/sys/contrib/openzfs/include/os/linux/kernel/linux/simd_powerpc.h @@ -69,6 +69,7 @@ #define kfpu_allowed() 1 #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 5, 0) +#ifdef CONFIG_SPE #define kfpu_begin() \ { \ preempt_disable(); \ @@ -83,6 +84,20 @@ disable_kernel_altivec(); \ preempt_enable(); \ } +#else /* CONFIG_SPE */ +#define kfpu_begin() \ + { \ + preempt_disable(); \ + enable_kernel_altivec(); \ + enable_kernel_vsx(); \ + } +#define kfpu_end() \ + { \ + disable_kernel_vsx(); \ + disable_kernel_altivec(); \ + preempt_enable(); \ + } +#endif #else /* seems that before 4.5 no-one bothered */ #define kfpu_begin() diff --git a/sys/contrib/openzfs/include/os/linux/spl/sys/ia32/asm_linkage.h b/sys/contrib/openzfs/include/os/linux/spl/sys/ia32/asm_linkage.h new file mode 100644 index 000000000000..3aaa4af5dab8 --- /dev/null +++ b/sys/contrib/openzfs/include/os/linux/spl/sys/ia32/asm_linkage.h @@ -0,0 +1,212 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or https://opensource.org/licenses/CDDL-1.0. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _IA32_SYS_ASM_LINKAGE_H +#define _IA32_SYS_ASM_LINKAGE_H + +#if defined(_KERNEL) && defined(__linux__) +#include +#endif + +#ifndef ENDBR +#if defined(__ELF__) && defined(__CET__) && defined(__has_include) +/* CSTYLED */ +#if __has_include() + +#include + +#ifdef _CET_ENDBR +#define ENDBR _CET_ENDBR +#endif /* _CET_ENDBR */ + +#endif /* */ +#endif /* __ELF__ && __CET__ && __has_include */ +#endif /* !ENDBR */ + +#ifndef ENDBR +#define ENDBR +#endif +#ifndef RET +#define RET ret +#endif + +/* You can set to nothing on Unix platforms */ +#undef ASMABI +#define ASMABI __attribute__((sysv_abi)) + +#define SECTION_TEXT .text +#define SECTION_STATIC .section .rodata + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef _ASM /* The remainder of this file is only for assembly files */ + +/* + * make annoying differences in assembler syntax go away + */ + +/* + * D16 and A16 are used to insert instructions prefixes; the + * macros help the assembler code be slightly more portable. + */ +#if !defined(__GNUC_AS__) +/* + * /usr/ccs/bin/as prefixes are parsed as separate instructions + */ +#define D16 data16; +#define A16 addr16; + +/* + * (There are some weird constructs in constant expressions) + */ +#define _CONST(const) [const] +#define _BITNOT(const) -1!_CONST(const) +#define _MUL(a, b) _CONST(a \* b) + +#else +/* + * Why not use the 'data16' and 'addr16' prefixes .. well, the + * assembler doesn't quite believe in real mode, and thus argues with + * us about what we're trying to do. + */ +#define D16 .byte 0x66; +#define A16 .byte 0x67; + +#define _CONST(const) (const) +#define _BITNOT(const) ~_CONST(const) +#define _MUL(a, b) _CONST(a * b) + +#endif + +/* + * C pointers are different sizes between i386 and amd64. + * These constants can be used to compute offsets into pointer arrays. + */ +#if defined(__amd64) +#define CLONGSHIFT 3 +#define CLONGSIZE 8 +#define CLONGMASK 7 +#elif defined(__i386) +#define CLONGSHIFT 2 +#define CLONGSIZE 4 +#define CLONGMASK 3 +#endif + +/* + * Since we know we're either ILP32 or LP64 .. + */ +#define CPTRSHIFT CLONGSHIFT +#define CPTRSIZE CLONGSIZE +#define CPTRMASK CLONGMASK + +#if CPTRSIZE != (1 << CPTRSHIFT) || CLONGSIZE != (1 << CLONGSHIFT) +#error "inconsistent shift constants" +#endif + +#if CPTRMASK != (CPTRSIZE - 1) || CLONGMASK != (CLONGSIZE - 1) +#error "inconsistent mask constants" +#endif + +#define ASM_ENTRY_ALIGN 16 + +/* + * SSE register alignment and save areas + */ + +#define XMM_SIZE 16 +#define XMM_ALIGN 16 + +/* + * ENTRY provides the standard procedure entry code and an easy way to + * insert the calls to mcount for profiling. ENTRY_NP is identical, but + * never calls mcount. + */ +#undef ENTRY +#define ENTRY(x) \ + .text; \ + .balign ASM_ENTRY_ALIGN; \ + .globl x; \ + .type x, @function; \ +x: MCOUNT(x) + +#define ENTRY_NP(x) \ + .text; \ + .balign ASM_ENTRY_ALIGN; \ + .globl x; \ + .type x, @function; \ +x: + +#define ENTRY_ALIGN(x, a) \ + .text; \ + .balign a; \ + .globl x; \ + .type x, @function; \ +x: + +#define FUNCTION(x) \ + .type x, @function; \ +x: + +/* + * ENTRY2 is identical to ENTRY but provides two labels for the entry point. + */ +#define ENTRY2(x, y) \ + .text; \ + .balign ASM_ENTRY_ALIGN; \ + .globl x, y; \ + .type x, @function; \ + .type y, @function; \ +x:; \ +y: MCOUNT(x) + +#define ENTRY_NP2(x, y) \ + .text; \ + .balign ASM_ENTRY_ALIGN; \ + .globl x, y; \ + .type x, @function; \ + .type y, @function; \ +x:; \ +y: + + +/* + * SET_SIZE trails a function and set the size for the ELF symbol table. + */ +#define SET_SIZE(x) \ + .size x, [.-x] + +#define SET_OBJ(x) .type x, @object + + +#endif /* _ASM */ + +#ifdef __cplusplus +} +#endif + +#endif /* _IA32_SYS_ASM_LINKAGE_H */ diff --git a/sys/contrib/openzfs/include/os/linux/spl/sys/kmem.h b/sys/contrib/openzfs/include/os/linux/spl/sys/kmem.h index 111924303e16..594425f7b297 100644 --- a/sys/contrib/openzfs/include/os/linux/spl/sys/kmem.h +++ b/sys/contrib/openzfs/include/os/linux/spl/sys/kmem.h @@ -40,6 +40,9 @@ extern void kmem_strfree(char *str); #define kmem_scnprintf scnprintf +#define POINTER_IS_VALID(p) (!((uintptr_t)(p) & 0x3)) +#define POINTER_INVALIDATE(pp) (*(pp) = (void *)((uintptr_t)(*(pp)) | 0x1)) + /* * Memory allocation interfaces */ diff --git a/sys/contrib/openzfs/include/os/linux/spl/sys/kmem_cache.h b/sys/contrib/openzfs/include/os/linux/spl/sys/kmem_cache.h index bd0ad5052d3d..cc9cafa84f99 100644 --- a/sys/contrib/openzfs/include/os/linux/spl/sys/kmem_cache.h +++ b/sys/contrib/openzfs/include/os/linux/spl/sys/kmem_cache.h @@ -98,9 +98,6 @@ extern struct rw_semaphore spl_kmem_cache_sem; #define SPL_MAX_KMEM_ORDER_NR_PAGES (KMALLOC_MAX_SIZE >> PAGE_SHIFT) #endif -#define POINTER_IS_VALID(p) 0 /* Unimplemented */ -#define POINTER_INVALIDATE(pp) /* Unimplemented */ - typedef int (*spl_kmem_ctor_t)(void *, void *, int); typedef void (*spl_kmem_dtor_t)(void *, void *); diff --git a/sys/contrib/openzfs/include/os/linux/zfs/sys/trace_arc.h b/sys/contrib/openzfs/include/os/linux/zfs/sys/trace_arc.h index d8e73337622c..c494f48bb48b 100644 --- a/sys/contrib/openzfs/include/os/linux/zfs/sys/trace_arc.h +++ b/sys/contrib/openzfs/include/os/linux/zfs/sys/trace_arc.h @@ -103,12 +103,13 @@ DEFINE_EVENT(zfs_arc_buf_hdr_class, name, \ TP_PROTO(arc_buf_hdr_t *ab), \ TP_ARGS(ab)) DEFINE_ARC_BUF_HDR_EVENT(zfs_arc__hit); +DEFINE_ARC_BUF_HDR_EVENT(zfs_arc__iohit); DEFINE_ARC_BUF_HDR_EVENT(zfs_arc__evict); DEFINE_ARC_BUF_HDR_EVENT(zfs_arc__delete); DEFINE_ARC_BUF_HDR_EVENT(zfs_new_state__mru); DEFINE_ARC_BUF_HDR_EVENT(zfs_new_state__mfu); +DEFINE_ARC_BUF_HDR_EVENT(zfs_new_state__uncached); DEFINE_ARC_BUF_HDR_EVENT(zfs_arc__async__upgrade__sync); -DEFINE_ARC_BUF_HDR_EVENT(zfs_arc__demand__hit__predictive__prefetch); DEFINE_ARC_BUF_HDR_EVENT(zfs_l2arc__hit); DEFINE_ARC_BUF_HDR_EVENT(zfs_l2arc__miss); @@ -387,12 +388,13 @@ DEFINE_ARC_WAIT_FOR_EVICTION_EVENT(zfs_arc__wait__for__eviction); #else DEFINE_DTRACE_PROBE1(arc__hit); +DEFINE_DTRACE_PROBE1(arc__iohit); DEFINE_DTRACE_PROBE1(arc__evict); DEFINE_DTRACE_PROBE1(arc__delete); DEFINE_DTRACE_PROBE1(new_state__mru); DEFINE_DTRACE_PROBE1(new_state__mfu); +DEFINE_DTRACE_PROBE1(new_state__uncached); DEFINE_DTRACE_PROBE1(arc__async__upgrade__sync); -DEFINE_DTRACE_PROBE1(arc__demand__hit__predictive__prefetch); DEFINE_DTRACE_PROBE1(l2arc__hit); DEFINE_DTRACE_PROBE1(l2arc__miss); DEFINE_DTRACE_PROBE2(l2arc__read); diff --git a/sys/contrib/openzfs/include/os/linux/zfs/sys/zpl.h b/sys/contrib/openzfs/include/os/linux/zfs/sys/zpl.h index c3ee0ae4a600..ac1f01a86c41 100644 --- a/sys/contrib/openzfs/include/os/linux/zfs/sys/zpl.h +++ b/sys/contrib/openzfs/include/os/linux/zfs/sys/zpl.h @@ -71,11 +71,14 @@ extern int zpl_xattr_security_init(struct inode *ip, struct inode *dip, #if defined(HAVE_SET_ACL_USERNS) extern int zpl_set_acl(struct user_namespace *userns, struct inode *ip, struct posix_acl *acl, int type); +#elif defined(HAVE_SET_ACL_USERNS_DENTRY_ARG2) +extern int zpl_set_acl(struct user_namespace *userns, struct dentry *dentry, + struct posix_acl *acl, int type); #else extern int zpl_set_acl(struct inode *ip, struct posix_acl *acl, int type); #endif /* HAVE_SET_ACL_USERNS */ #endif /* HAVE_SET_ACL */ -#if defined(HAVE_GET_ACL_RCU) +#if defined(HAVE_GET_ACL_RCU) || defined(HAVE_GET_INODE_ACL) extern struct posix_acl *zpl_get_acl(struct inode *ip, int type, bool rcu); #elif defined(HAVE_GET_ACL) extern struct posix_acl *zpl_get_acl(struct inode *ip, int type); diff --git a/sys/contrib/openzfs/include/sys/arc.h b/sys/contrib/openzfs/include/sys/arc.h index 532a2fe4bc03..2b4f16ee0a86 100644 --- a/sys/contrib/openzfs/include/sys/arc.h +++ b/sys/contrib/openzfs/include/sys/arc.h @@ -115,7 +115,7 @@ typedef enum arc_flags ARC_FLAG_PREFETCH = 1 << 2, /* I/O is a prefetch */ ARC_FLAG_CACHED = 1 << 3, /* I/O was in cache */ ARC_FLAG_L2CACHE = 1 << 4, /* cache in L2ARC */ - ARC_FLAG_PREDICTIVE_PREFETCH = 1 << 5, /* I/O from zfetch */ + ARC_FLAG_UNCACHED = 1 << 5, /* evict after use */ ARC_FLAG_PRESCIENT_PREFETCH = 1 << 6, /* long min lifespan */ /* @@ -195,7 +195,6 @@ typedef enum arc_buf_flags { struct arc_buf { arc_buf_hdr_t *b_hdr; arc_buf_t *b_next; - kmutex_t b_evict_lock; void *b_data; arc_buf_flags_t b_flags; }; @@ -229,6 +228,7 @@ typedef enum arc_state_type { ARC_STATE_MFU, ARC_STATE_MFU_GHOST, ARC_STATE_L2C_ONLY, + ARC_STATE_UNCACHED, ARC_STATE_NUMTYPES } arc_state_type_t; @@ -302,8 +302,8 @@ int arc_referenced(arc_buf_t *buf); int arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_read_done_func_t *done, void *priv, zio_priority_t priority, int flags, arc_flags_t *arc_flags, const zbookmark_phys_t *zb); -zio_t *arc_write(zio_t *pio, spa_t *spa, uint64_t txg, - blkptr_t *bp, arc_buf_t *buf, boolean_t l2arc, const zio_prop_t *zp, +zio_t *arc_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, + arc_buf_t *buf, boolean_t uncached, boolean_t l2arc, const zio_prop_t *zp, arc_write_done_func_t *ready, arc_write_done_func_t *child_ready, arc_write_done_func_t *physdone, arc_write_done_func_t *done, void *priv, zio_priority_t priority, int zio_flags, diff --git a/sys/contrib/openzfs/include/sys/arc_impl.h b/sys/contrib/openzfs/include/sys/arc_impl.h index 03eebafa9952..082372729b80 100644 --- a/sys/contrib/openzfs/include/sys/arc_impl.h +++ b/sys/contrib/openzfs/include/sys/arc_impl.h @@ -46,6 +46,7 @@ extern "C" { * ARC_mru_ghost - recently used, no longer in cache * ARC_mfu - frequently used, currently cached * ARC_mfu_ghost - frequently used, no longer in cache + * ARC_uncached - uncacheable prefetch, to be evicted * ARC_l2c_only - exists in L2ARC but not other states * When there are no active references to the buffer, they are * are linked onto a list in one of these arc states. These are @@ -101,9 +102,14 @@ struct arc_callback { boolean_t acb_compressed; boolean_t acb_noauth; boolean_t acb_nobuf; + boolean_t acb_wait; + int acb_wait_error; + kmutex_t acb_wait_lock; + kcondvar_t acb_wait_cv; zbookmark_phys_t acb_zb; zio_t *acb_zio_dummy; zio_t *acb_zio_head; + arc_callback_t *acb_prev; arc_callback_t *acb_next; }; @@ -150,9 +156,6 @@ struct arc_write_callback { * these two allocation states. */ typedef struct l1arc_buf_hdr { - kmutex_t b_freeze_lock; - zio_cksum_t *b_freeze_cksum; - /* for waiting on reads to complete */ kcondvar_t b_cv; uint8_t b_byteswap; @@ -175,6 +178,11 @@ typedef struct l1arc_buf_hdr { arc_callback_t *b_acb; abd_t *b_pabd; + +#ifdef ZFS_DEBUG + zio_cksum_t *b_freeze_cksum; + kmutex_t b_freeze_lock; +#endif } l1arc_buf_hdr_t; typedef enum l2arc_dev_hdr_flags_t { @@ -511,20 +519,33 @@ struct arc_buf_hdr { }; typedef struct arc_stats { + /* Number of requests that were satisfied without I/O. */ kstat_named_t arcstat_hits; + /* Number of requests for which I/O was already running. */ + kstat_named_t arcstat_iohits; + /* Number of requests for which I/O has to be issued. */ kstat_named_t arcstat_misses; + /* Same three, but specifically for demand data. */ kstat_named_t arcstat_demand_data_hits; + kstat_named_t arcstat_demand_data_iohits; kstat_named_t arcstat_demand_data_misses; + /* Same three, but specifically for demand metadata. */ kstat_named_t arcstat_demand_metadata_hits; + kstat_named_t arcstat_demand_metadata_iohits; kstat_named_t arcstat_demand_metadata_misses; + /* Same three, but specifically for prefetch data. */ kstat_named_t arcstat_prefetch_data_hits; + kstat_named_t arcstat_prefetch_data_iohits; kstat_named_t arcstat_prefetch_data_misses; + /* Same three, but specifically for prefetch metadata. */ kstat_named_t arcstat_prefetch_metadata_hits; + kstat_named_t arcstat_prefetch_metadata_iohits; kstat_named_t arcstat_prefetch_metadata_misses; kstat_named_t arcstat_mru_hits; kstat_named_t arcstat_mru_ghost_hits; kstat_named_t arcstat_mfu_hits; kstat_named_t arcstat_mfu_ghost_hits; + kstat_named_t arcstat_uncached_hits; kstat_named_t arcstat_deleted; /* * Number of buffers that could not be evicted because the hash lock @@ -727,6 +748,21 @@ typedef struct arc_stats { * ARC_BUFC_METADATA, and linked off the arc_mru_ghost state. */ kstat_named_t arcstat_mfu_ghost_evictable_metadata; + /* + * Total number of bytes that are going to be evicted from ARC due to + * ARC_FLAG_UNCACHED being set. + */ + kstat_named_t arcstat_uncached_size; + /* + * Number of data bytes that are going to be evicted from ARC due to + * ARC_FLAG_UNCACHED being set. + */ + kstat_named_t arcstat_uncached_evictable_data; + /* + * Number of metadata bytes that that are going to be evicted from ARC + * due to ARC_FLAG_UNCACHED being set. + */ + kstat_named_t arcstat_uncached_evictable_metadata; kstat_named_t arcstat_l2_hits; kstat_named_t arcstat_l2_misses; /* @@ -844,8 +880,18 @@ typedef struct arc_stats { kstat_named_t arcstat_meta_max; kstat_named_t arcstat_meta_min; kstat_named_t arcstat_async_upgrade_sync; + /* Number of predictive prefetch requests. */ + kstat_named_t arcstat_predictive_prefetch; + /* Number of requests for which predictive prefetch has completed. */ kstat_named_t arcstat_demand_hit_predictive_prefetch; + /* Number of requests for which predictive prefetch was running. */ + kstat_named_t arcstat_demand_iohit_predictive_prefetch; + /* Number of prescient prefetch requests. */ + kstat_named_t arcstat_prescient_prefetch; + /* Number of requests for which prescient prefetch has completed. */ kstat_named_t arcstat_demand_hit_prescient_prefetch; + /* Number of requests for which prescient prefetch was running. */ + kstat_named_t arcstat_demand_iohit_prescient_prefetch; kstat_named_t arcstat_need_free; kstat_named_t arcstat_sys_free; kstat_named_t arcstat_raw_size; @@ -855,19 +901,25 @@ typedef struct arc_stats { typedef struct arc_sums { wmsum_t arcstat_hits; + wmsum_t arcstat_iohits; wmsum_t arcstat_misses; wmsum_t arcstat_demand_data_hits; + wmsum_t arcstat_demand_data_iohits; wmsum_t arcstat_demand_data_misses; wmsum_t arcstat_demand_metadata_hits; + wmsum_t arcstat_demand_metadata_iohits; wmsum_t arcstat_demand_metadata_misses; wmsum_t arcstat_prefetch_data_hits; + wmsum_t arcstat_prefetch_data_iohits; wmsum_t arcstat_prefetch_data_misses; wmsum_t arcstat_prefetch_metadata_hits; + wmsum_t arcstat_prefetch_metadata_iohits; wmsum_t arcstat_prefetch_metadata_misses; wmsum_t arcstat_mru_hits; wmsum_t arcstat_mru_ghost_hits; wmsum_t arcstat_mfu_hits; wmsum_t arcstat_mfu_ghost_hits; + wmsum_t arcstat_uncached_hits; wmsum_t arcstat_deleted; wmsum_t arcstat_mutex_miss; wmsum_t arcstat_access_skip; @@ -936,8 +988,12 @@ typedef struct arc_sums { wmsum_t arcstat_prune; aggsum_t arcstat_meta_used; wmsum_t arcstat_async_upgrade_sync; + wmsum_t arcstat_predictive_prefetch; wmsum_t arcstat_demand_hit_predictive_prefetch; + wmsum_t arcstat_demand_iohit_predictive_prefetch; + wmsum_t arcstat_prescient_prefetch; wmsum_t arcstat_demand_hit_prescient_prefetch; + wmsum_t arcstat_demand_iohit_prescient_prefetch; wmsum_t arcstat_raw_size; wmsum_t arcstat_cached_only_in_progress; wmsum_t arcstat_abd_chunk_waste_size; @@ -970,6 +1026,7 @@ typedef struct arc_evict_waiter { #define arc_mfu (&ARC_mfu) #define arc_mfu_ghost (&ARC_mfu_ghost) #define arc_l2c_only (&ARC_l2c_only) +#define arc_uncached (&ARC_uncached) extern taskq_t *arc_prune_taskq; extern arc_stats_t arc_stats; diff --git a/sys/contrib/openzfs/include/sys/asm_linkage.h b/sys/contrib/openzfs/include/sys/asm_linkage.h new file mode 100644 index 000000000000..749157d4c3db --- /dev/null +++ b/sys/contrib/openzfs/include/sys/asm_linkage.h @@ -0,0 +1,48 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or https://opensource.org/licenses/CDDL-1.0. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_ASM_LINKAGE_H +#define _SYS_ASM_LINKAGE_H + +#define ASMABI + +#if defined(__i386) || defined(__amd64) + +#include /* XX64 x86/sys/asm_linkage.h */ + +#endif + +#if defined(_KERNEL) && defined(HAVE_KERNEL_OBJTOOL) + +#include + +#else /* userspace */ +#define FRAME_BEGIN +#define FRAME_END +#endif + + +#endif /* _SYS_ASM_LINKAGE_H */ diff --git a/sys/contrib/openzfs/include/sys/bqueue.h b/sys/contrib/openzfs/include/sys/bqueue.h index b9621966027a..edcee16227ec 100644 --- a/sys/contrib/openzfs/include/sys/bqueue.h +++ b/sys/contrib/openzfs/include/sys/bqueue.h @@ -27,10 +27,14 @@ extern "C" { typedef struct bqueue { list_t bq_list; + size_t bq_size; + list_t bq_dequeuing_list; + size_t bq_dequeuing_size; + list_t bq_enqueuing_list; + size_t bq_enqueuing_size; kmutex_t bq_lock; kcondvar_t bq_add_cv; kcondvar_t bq_pop_cv; - size_t bq_size; size_t bq_maxsize; uint_t bq_fill_fraction; size_t bq_node_offset; @@ -47,7 +51,6 @@ void bqueue_destroy(bqueue_t *); void bqueue_enqueue(bqueue_t *, void *, size_t); void bqueue_enqueue_flush(bqueue_t *, void *, size_t); void *bqueue_dequeue(bqueue_t *); -boolean_t bqueue_empty(bqueue_t *); #ifdef __cplusplus } diff --git a/sys/contrib/openzfs/include/sys/dbuf.h b/sys/contrib/openzfs/include/sys/dbuf.h index 9ba46f0d725f..a1ce76b1c763 100644 --- a/sys/contrib/openzfs/include/sys/dbuf.h +++ b/sys/contrib/openzfs/include/sys/dbuf.h @@ -55,6 +55,8 @@ extern "C" { #define DB_RF_NEVERWAIT (1 << 4) #define DB_RF_CACHED (1 << 5) #define DB_RF_NO_DECRYPT (1 << 6) +#define DB_RF_PARTIAL_FIRST (1 << 7) +#define DB_RF_PARTIAL_MORE (1 << 8) /* * The simplified state transition diagram for dbufs looks like: @@ -294,6 +296,8 @@ typedef struct dmu_buf_impl { /* Tells us which dbuf cache this dbuf is in, if any */ dbuf_cached_state_t db_caching_status; + uint64_t db_hash; + /* Data which is unique to data (leaf) blocks: */ /* User callback information. */ @@ -319,6 +323,9 @@ typedef struct dmu_buf_impl { uint8_t db_pending_evict; uint8_t db_dirtycnt; + + /* The buffer was partially read. More reads may follow. */ + uint8_t db_partial_read; } dmu_buf_impl_t; #define DBUF_HASH_MUTEX(h, idx) \ @@ -364,7 +371,7 @@ void dbuf_rele_and_unlock(dmu_buf_impl_t *db, const void *tag, boolean_t evicting); dmu_buf_impl_t *dbuf_find(struct objset *os, uint64_t object, uint8_t level, - uint64_t blkid); + uint64_t blkid, uint64_t *hash_out); int dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags); void dmu_buf_will_not_fill(dmu_buf_t *db, dmu_tx_t *tx); diff --git a/sys/contrib/openzfs/include/sys/dnode.h b/sys/contrib/openzfs/include/sys/dnode.h index 9553988e7ddd..2d741ea36bd0 100644 --- a/sys/contrib/openzfs/include/sys/dnode.h +++ b/sys/contrib/openzfs/include/sys/dnode.h @@ -457,15 +457,11 @@ void dnode_free_interior_slots(dnode_t *dn); #define DNODE_IS_DIRTY(_dn) \ ((_dn)->dn_dirty_txg >= spa_syncing_txg((_dn)->dn_objset->os_spa)) -#define DNODE_IS_CACHEABLE(_dn) \ +#define DNODE_LEVEL_IS_CACHEABLE(_dn, _level) \ ((_dn)->dn_objset->os_primary_cache == ZFS_CACHE_ALL || \ - (DMU_OT_IS_METADATA((_dn)->dn_type) && \ + (((_level) > 0 || DMU_OT_IS_METADATA((_dn)->dn_type)) && \ (_dn)->dn_objset->os_primary_cache == ZFS_CACHE_METADATA)) -#define DNODE_META_IS_CACHEABLE(_dn) \ - ((_dn)->dn_objset->os_primary_cache == ZFS_CACHE_ALL || \ - (_dn)->dn_objset->os_primary_cache == ZFS_CACHE_METADATA) - /* * Used for dnodestats kstat. */ diff --git a/sys/contrib/openzfs/include/sys/dsl_dir.h b/sys/contrib/openzfs/include/sys/dsl_dir.h index 384f98e8f722..f7c0d9acd10d 100644 --- a/sys/contrib/openzfs/include/sys/dsl_dir.h +++ b/sys/contrib/openzfs/include/sys/dsl_dir.h @@ -116,7 +116,7 @@ struct dsl_dir { /* gross estimate of space used by in-flight tx's */ uint64_t dd_tempreserved[TXG_SIZE]; /* amount of space we expect to write; == amount of dirty data */ - int64_t dd_space_towrite[TXG_SIZE]; + uint64_t dd_space_towrite[TXG_SIZE]; dsl_deadlist_t dd_livelist; bplist_t dd_pending_frees; diff --git a/sys/contrib/openzfs/include/sys/fm/fs/zfs.h b/sys/contrib/openzfs/include/sys/fm/fs/zfs.h index 97cb14aee36a..b9bac7e252e5 100644 --- a/sys/contrib/openzfs/include/sys/fm/fs/zfs.h +++ b/sys/contrib/openzfs/include/sys/fm/fs/zfs.h @@ -78,6 +78,10 @@ extern "C" { #define FM_EREPORT_PAYLOAD_ZFS_VDEV_READ_ERRORS "vdev_read_errors" #define FM_EREPORT_PAYLOAD_ZFS_VDEV_WRITE_ERRORS "vdev_write_errors" #define FM_EREPORT_PAYLOAD_ZFS_VDEV_CKSUM_ERRORS "vdev_cksum_errors" +#define FM_EREPORT_PAYLOAD_ZFS_VDEV_CKSUM_N "vdev_cksum_n" +#define FM_EREPORT_PAYLOAD_ZFS_VDEV_CKSUM_T "vdev_cksum_t" +#define FM_EREPORT_PAYLOAD_ZFS_VDEV_IO_N "vdev_io_n" +#define FM_EREPORT_PAYLOAD_ZFS_VDEV_IO_T "vdev_io_t" #define FM_EREPORT_PAYLOAD_ZFS_VDEV_DELAYS "vdev_delays" #define FM_EREPORT_PAYLOAD_ZFS_PARENT_GUID "parent_guid" #define FM_EREPORT_PAYLOAD_ZFS_PARENT_TYPE "parent_type" diff --git a/sys/contrib/openzfs/include/sys/fs/zfs.h b/sys/contrib/openzfs/include/sys/fs/zfs.h index 1124604e8c68..da2d0521655c 100644 --- a/sys/contrib/openzfs/include/sys/fs/zfs.h +++ b/sys/contrib/openzfs/include/sys/fs/zfs.h @@ -356,6 +356,10 @@ typedef enum { VDEV_PROP_REMOVING, VDEV_PROP_ALLOCATING, VDEV_PROP_FAILFAST, + VDEV_PROP_CHECKSUM_N, + VDEV_PROP_CHECKSUM_T, + VDEV_PROP_IO_N, + VDEV_PROP_IO_T, VDEV_NUM_PROPS } vdev_prop_t; @@ -1657,6 +1661,7 @@ typedef enum { #define ZFS_ONLINE_UNSPARE 0x2 #define ZFS_ONLINE_FORCEFAULT 0x4 #define ZFS_ONLINE_EXPAND 0x8 +#define ZFS_ONLINE_SPARE 0x10 #define ZFS_OFFLINE_TEMPORARY 0x1 /* diff --git a/sys/contrib/openzfs/include/sys/spa.h b/sys/contrib/openzfs/include/sys/spa.h index b260dd32820e..500eb3491a99 100644 --- a/sys/contrib/openzfs/include/sys/spa.h +++ b/sys/contrib/openzfs/include/sys/spa.h @@ -1146,7 +1146,7 @@ extern nvlist_t *zfs_event_create(spa_t *spa, vdev_t *vd, const char *type, extern void zfs_post_remove(spa_t *spa, vdev_t *vd); extern void zfs_post_state_change(spa_t *spa, vdev_t *vd, uint64_t laststate); extern void zfs_post_autoreplace(spa_t *spa, vdev_t *vd); -extern uint64_t spa_get_errlog_size(spa_t *spa); +extern uint64_t spa_approx_errlog_size(spa_t *spa); extern int spa_get_errlog(spa_t *spa, void *uaddr, uint64_t *count); extern void spa_errlog_rotate(spa_t *spa); extern void spa_errlog_drain(spa_t *spa); diff --git a/sys/contrib/openzfs/include/sys/vdev_impl.h b/sys/contrib/openzfs/include/sys/vdev_impl.h index 3f4b78b947a3..73c0206efa2e 100644 --- a/sys/contrib/openzfs/include/sys/vdev_impl.h +++ b/sys/contrib/openzfs/include/sys/vdev_impl.h @@ -469,6 +469,14 @@ struct vdev { zfs_ratelimit_t vdev_delay_rl; zfs_ratelimit_t vdev_deadman_rl; zfs_ratelimit_t vdev_checksum_rl; + + /* + * Checksum and IO thresholds for tuning ZED + */ + uint64_t vdev_checksum_n; + uint64_t vdev_checksum_t; + uint64_t vdev_io_n; + uint64_t vdev_io_t; }; #define VDEV_PAD_SIZE (8 << 10) diff --git a/sys/contrib/openzfs/include/sys/zfs_znode.h b/sys/contrib/openzfs/include/sys/zfs_znode.h index 3a4a0c3cb539..de38f56dc32d 100644 --- a/sys/contrib/openzfs/include/sys/zfs_znode.h +++ b/sys/contrib/openzfs/include/sys/zfs_znode.h @@ -272,6 +272,8 @@ extern int zfs_freesp(znode_t *, uint64_t, uint64_t, int, boolean_t); extern void zfs_znode_init(void); extern void zfs_znode_fini(void); extern int zfs_znode_hold_compare(const void *, const void *); +extern znode_hold_t *zfs_znode_hold_enter(zfsvfs_t *, uint64_t); +extern void zfs_znode_hold_exit(zfsvfs_t *, znode_hold_t *); extern int zfs_zget(zfsvfs_t *, uint64_t, znode_t **); extern int zfs_rezget(znode_t *); extern void zfs_zinactive(znode_t *); diff --git a/sys/contrib/openzfs/include/sys/zrlock.h b/sys/contrib/openzfs/include/sys/zrlock.h index cddaae52dc38..e2a7a254a6e0 100644 --- a/sys/contrib/openzfs/include/sys/zrlock.h +++ b/sys/contrib/openzfs/include/sys/zrlock.h @@ -34,9 +34,8 @@ extern "C" { typedef struct zrlock { kmutex_t zr_mtx; - volatile int32_t zr_refcount; kcondvar_t zr_cv; - uint16_t zr_pad; + volatile int32_t zr_refcount; #ifdef ZFS_DEBUG kthread_t *zr_owner; const char *zr_caller; diff --git a/sys/contrib/openzfs/lib/libicp/Makefile.am b/sys/contrib/openzfs/lib/libicp/Makefile.am index b7f1d0e1b1e4..7c6cf71de242 100644 --- a/sys/contrib/openzfs/lib/libicp/Makefile.am +++ b/sys/contrib/openzfs/lib/libicp/Makefile.am @@ -67,3 +67,4 @@ nodist_libicp_la_SOURCES += \ module/icp/asm-x86_64/blake3/blake3_sse2.S \ module/icp/asm-x86_64/blake3/blake3_sse41.S endif + diff --git a/sys/contrib/openzfs/lib/libspl/include/Makefile.am b/sys/contrib/openzfs/lib/libspl/include/Makefile.am index 6f0e1818d22e..c8b41bbc296e 100644 --- a/sys/contrib/openzfs/lib/libspl/include/Makefile.am +++ b/sys/contrib/openzfs/lib/libspl/include/Makefile.am @@ -26,6 +26,7 @@ libspl_sysdir = $(libspldir)/sys libspl_sys_HEADERS = \ %D%/sys/acl.h \ %D%/sys/acl_impl.h \ + %D%/sys/asm_linkage.h \ %D%/sys/callb.h \ %D%/sys/cmn_err.h \ %D%/sys/cred.h \ @@ -62,6 +63,8 @@ libspl_sys_HEADERS = \ %D%/sys/wmsum.h \ %D%/sys/zone.h +libspl_ia32dir = $(libspldir)/sys/ia32 + if BUILD_LINUX libspl_sys_HEADERS += \ %D%/os/linux/sys/byteorder.h \ @@ -72,6 +75,9 @@ libspl_sys_HEADERS += \ %D%/os/linux/sys/stat.h \ %D%/os/linux/sys/sysmacros.h \ %D%/os/linux/sys/zfs_context_os.h + +libspl_ia32_HEADERS = \ + %D%/os/linux/sys/ia32/asm_linkage.h endif if BUILD_FREEBSD @@ -86,9 +92,13 @@ libspl_sys_HEADERS += \ %D%/os/freebsd/sys/sysmacros.h \ %D%/os/freebsd/sys/vfs.h \ %D%/os/freebsd/sys/zfs_context_os.h + +libspl_ia32_HEADERS = \ + %D%/os/freebsd/sys/ia32/asm_linkage.h endif libspl_sys_dktpdir = $(libspl_sysdir)/dktp libspl_sys_dktp_HEADERS = \ %D%/sys/dktp/fdisk.h + diff --git a/sys/contrib/openzfs/lib/libspl/include/os/freebsd/sys/ia32/asm_linkage.h b/sys/contrib/openzfs/lib/libspl/include/os/freebsd/sys/ia32/asm_linkage.h new file mode 100644 index 000000000000..9964f183cc68 --- /dev/null +++ b/sys/contrib/openzfs/lib/libspl/include/os/freebsd/sys/ia32/asm_linkage.h @@ -0,0 +1,184 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or https://opensource.org/licenses/CDDL-1.0. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _IA32_SYS_ASM_LINKAGE_H +#define _IA32_SYS_ASM_LINKAGE_H + +#if defined(__linux__) && defined(CONFIG_SLS) +#define RET ret; int3 +#else +#define RET ret +#endif + +/* Tell compiler to call assembler like Unix */ +#undef ASMABI +#define ASMABI __attribute__((sysv_abi)) + +#define ENDBR + +#define SECTION_TEXT .text +#define SECTION_STATIC .data + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef _ASM /* The remainder of this file is only for assembly files */ + +/* + * make annoying differences in assembler syntax go away + */ + +/* + * D16 and A16 are used to insert instructions prefixes; the + * macros help the assembler code be slightly more portable. + */ +#if !defined(__GNUC_AS__) +/* + * /usr/ccs/bin/as prefixes are parsed as separate instructions + */ +#define D16 data16; +#define A16 addr16; + +/* + * (There are some weird constructs in constant expressions) + */ +#define _CONST(const) [const] +#define _BITNOT(const) -1!_CONST(const) +#define _MUL(a, b) _CONST(a \* b) + +#else +/* + * Why not use the 'data16' and 'addr16' prefixes .. well, the + * assembler doesn't quite believe in real mode, and thus argues with + * us about what we're trying to do. + */ +#define D16 .byte 0x66; +#define A16 .byte 0x67; + +#define _CONST(const) (const) +#define _BITNOT(const) ~_CONST(const) +#define _MUL(a, b) _CONST(a * b) + +#endif + +/* + * C pointers are different sizes between i386 and amd64. + * These constants can be used to compute offsets into pointer arrays. + */ +#if defined(__amd64) +#define CLONGSHIFT 3 +#define CLONGSIZE 8 +#define CLONGMASK 7 +#elif defined(__i386) +#define CLONGSHIFT 2 +#define CLONGSIZE 4 +#define CLONGMASK 3 +#endif + +/* + * Since we know we're either ILP32 or LP64 .. + */ +#define CPTRSHIFT CLONGSHIFT +#define CPTRSIZE CLONGSIZE +#define CPTRMASK CLONGMASK + +#if CPTRSIZE != (1 << CPTRSHIFT) || CLONGSIZE != (1 << CLONGSHIFT) +#error "inconsistent shift constants" +#endif + +#if CPTRMASK != (CPTRSIZE - 1) || CLONGMASK != (CLONGSIZE - 1) +#error "inconsistent mask constants" +#endif + +#define ASM_ENTRY_ALIGN 16 + +/* + * SSE register alignment and save areas + */ + +#define XMM_SIZE 16 +#define XMM_ALIGN 16 + +/* + * ENTRY provides the standard procedure entry code and an easy way to + * insert the calls to mcount for profiling. ENTRY_NP is identical, but + * never calls mcount. + */ +#define ENTRY(x) \ + .text; \ + .balign ASM_ENTRY_ALIGN; \ + .globl x; \ +x: MCOUNT(x) + +#define ENTRY_NP(x) \ + .text; \ + .balign ASM_ENTRY_ALIGN; \ + .globl x; \ +x: + +#define ENTRY_ALIGN(x, a) \ + .text; \ + .balign a; \ + .globl x; \ +x: + +#define FUNCTION(x) \ + .type x, @function; \ +x: + +/* + * ENTRY2 is identical to ENTRY but provides two labels for the entry point. + */ +#define ENTRY2(x, y) \ + .text; \ + .balign ASM_ENTRY_ALIGN; \ + .globl x, y; \ +x:; \ +y: MCOUNT(x) + +#define ENTRY_NP2(x, y) \ + .text; \ + .balign ASM_ENTRY_ALIGN; \ + .globl x, y; \ +x:; \ +y: + + +/* + * SET_SIZE trails a function and set the size for the ELF symbol table. + */ +#define SET_SIZE(x) + +#define SET_OBJ(x) + +#endif /* _ASM */ + +#ifdef __cplusplus +} +#endif + +#endif /* _IA32_SYS_ASM_LINKAGE_H */ diff --git a/sys/contrib/openzfs/module/icp/include/sys/ia32/asm_linkage.h b/sys/contrib/openzfs/lib/libspl/include/os/linux/sys/ia32/asm_linkage.h similarity index 89% rename from sys/contrib/openzfs/module/icp/include/sys/ia32/asm_linkage.h rename to sys/contrib/openzfs/lib/libspl/include/os/linux/sys/ia32/asm_linkage.h index e3e769ffd858..f07596123341 100644 --- a/sys/contrib/openzfs/module/icp/include/sys/ia32/asm_linkage.h +++ b/sys/contrib/openzfs/lib/libspl/include/os/linux/sys/ia32/asm_linkage.h @@ -27,9 +27,6 @@ #ifndef _IA32_SYS_ASM_LINKAGE_H #define _IA32_SYS_ASM_LINKAGE_H -#include -#include - #if defined(_KERNEL) && defined(__linux__) #include #endif @@ -56,6 +53,13 @@ #define RET ret #endif +/* You can set to nothing on Unix platforms */ +#undef ASMABI +#define ASMABI __attribute__((sysv_abi)) + +#define SECTION_TEXT .text +#define SECTION_STATIC .section .rodata + #ifdef __cplusplus extern "C" { #endif @@ -145,24 +149,35 @@ extern "C" { #undef ENTRY #define ENTRY(x) \ .text; \ - .align ASM_ENTRY_ALIGN; \ + .balign ASM_ENTRY_ALIGN; \ .globl x; \ .type x, @function; \ x: MCOUNT(x) #define ENTRY_NP(x) \ .text; \ - .align ASM_ENTRY_ALIGN; \ + .balign ASM_ENTRY_ALIGN; \ .globl x; \ .type x, @function; \ x: +#define ENTRY_ALIGN(x, a) \ + .text; \ + .balign a; \ + .globl x; \ + .type x, @function; \ +x: + +#define FUNCTION(x) \ + .type x, @function; \ +x: + /* * ENTRY2 is identical to ENTRY but provides two labels for the entry point. */ #define ENTRY2(x, y) \ .text; \ - .align ASM_ENTRY_ALIGN; \ + .balign ASM_ENTRY_ALIGN; \ .globl x, y; \ .type x, @function; \ .type y, @function; \ @@ -171,7 +186,7 @@ y: MCOUNT(x) #define ENTRY_NP2(x, y) \ .text; \ - .align ASM_ENTRY_ALIGN; \ + .balign ASM_ENTRY_ALIGN; \ .globl x, y; \ .type x, @function; \ .type y, @function; \ @@ -185,6 +200,8 @@ x:; \ #define SET_SIZE(x) \ .size x, [.-x] +#define SET_OBJ(x) .type x, @object + #endif /* _ASM */ #ifdef __cplusplus diff --git a/sys/contrib/openzfs/module/icp/include/sys/asm_linkage.h b/sys/contrib/openzfs/lib/libspl/include/sys/asm_linkage.h similarity index 100% rename from sys/contrib/openzfs/module/icp/include/sys/asm_linkage.h rename to sys/contrib/openzfs/lib/libspl/include/sys/asm_linkage.h diff --git a/sys/contrib/openzfs/lib/libzfs/libzfs.abi b/sys/contrib/openzfs/lib/libzfs/libzfs.abi index 98873784e7dc..16fea63f895c 100644 --- a/sys/contrib/openzfs/lib/libzfs/libzfs.abi +++ b/sys/contrib/openzfs/lib/libzfs/libzfs.abi @@ -416,6 +416,8 @@ + + @@ -2925,13 +2927,14 @@ + - + @@ -2940,12 +2943,14 @@ + + @@ -2954,6 +2959,7 @@ + @@ -2961,12 +2967,14 @@ + + @@ -3215,7 +3223,11 @@ - + + + + + @@ -3828,6 +3840,11 @@ + + + + + @@ -4519,6 +4536,12 @@ + + + + + + diff --git a/sys/contrib/openzfs/lib/libzfs/libzfs_changelist.c b/sys/contrib/openzfs/lib/libzfs/libzfs_changelist.c index e5e735d38e00..d7ea60822419 100644 --- a/sys/contrib/openzfs/lib/libzfs/libzfs_changelist.c +++ b/sys/contrib/openzfs/lib/libzfs/libzfs_changelist.c @@ -552,7 +552,7 @@ change_one(zfs_handle_t *zhp, void *data) } if (!clp->cl_alldependents) - ret = zfs_iter_children(zhp, change_one, data); + ret = zfs_iter_children(zhp, 0, change_one, data); /* * If we added the handle to the changelist, we will re-use it @@ -721,11 +721,11 @@ changelist_gather(zfs_handle_t *zhp, zfs_prop_t prop, int gather_flags, return (NULL); } } else if (clp->cl_alldependents) { - if (zfs_iter_dependents(zhp, B_TRUE, change_one, clp) != 0) { + if (zfs_iter_dependents(zhp, 0, B_TRUE, change_one, clp) != 0) { changelist_free(clp); return (NULL); } - } else if (zfs_iter_children(zhp, change_one, clp) != 0) { + } else if (zfs_iter_children(zhp, 0, change_one, clp) != 0) { changelist_free(clp); return (NULL); } diff --git a/sys/contrib/openzfs/lib/libzfs/libzfs_crypto.c b/sys/contrib/openzfs/lib/libzfs/libzfs_crypto.c index c241aeaa4da0..3ef883701082 100644 --- a/sys/contrib/openzfs/lib/libzfs/libzfs_crypto.c +++ b/sys/contrib/openzfs/lib/libzfs/libzfs_crypto.c @@ -1226,7 +1226,7 @@ load_keys_cb(zfs_handle_t *zhp, void *arg) cb->cb_numfailed++; out: - (void) zfs_iter_filesystems(zhp, load_keys_cb, cb); + (void) zfs_iter_filesystems(zhp, 0, load_keys_cb, cb); zfs_close(zhp); /* always return 0, since this function is best effort */ diff --git a/sys/contrib/openzfs/lib/libzfs/libzfs_dataset.c b/sys/contrib/openzfs/lib/libzfs/libzfs_dataset.c index 87bc4ea66c5b..9ecb1ac5c6fb 100644 --- a/sys/contrib/openzfs/lib/libzfs/libzfs_dataset.c +++ b/sys/contrib/openzfs/lib/libzfs/libzfs_dataset.c @@ -527,7 +527,30 @@ make_dataset_simple_handle_zc(zfs_handle_t *pzhp, zfs_cmd_t *zc) zhp->zfs_head_type = pzhp->zfs_type; zhp->zfs_type = ZFS_TYPE_SNAPSHOT; zhp->zpool_hdl = zpool_handle(zhp); - zhp->zfs_dmustats = zc->zc_objset_stats; + + if (zc->zc_objset_stats.dds_creation_txg != 0) { + /* structure assignment */ + zhp->zfs_dmustats = zc->zc_objset_stats; + } else { + if (get_stats_ioctl(zhp, zc) == -1) { + zcmd_free_nvlists(zc); + free(zhp); + return (NULL); + } + if (make_dataset_handle_common(zhp, zc) == -1) { + zcmd_free_nvlists(zc); + free(zhp); + return (NULL); + } + } + + if (zhp->zfs_dmustats.dds_is_snapshot || + strchr(zc->zc_name, '@') != NULL) + zhp->zfs_type = ZFS_TYPE_SNAPSHOT; + else if (zhp->zfs_dmustats.dds_type == DMU_OST_ZVOL) + zhp->zfs_type = ZFS_TYPE_VOLUME; + else if (zhp->zfs_dmustats.dds_type == DMU_OST_ZFS) + zhp->zfs_type = ZFS_TYPE_FILESYSTEM; return (zhp); } @@ -690,6 +713,7 @@ zfs_open(libzfs_handle_t *hdl, const char *path, int types) */ if (!zfs_validate_name(hdl, path, types, B_FALSE)) { (void) zfs_error(hdl, EZFS_INVALIDNAME, errbuf); + errno = EINVAL; return (NULL); } @@ -733,10 +757,11 @@ zfs_open(libzfs_handle_t *hdl, const char *path, int types) * Iterate bookmarks to find the right one. */ errno = 0; - if ((zfs_iter_bookmarks(pzhp, zfs_open_bookmarks_cb, + if ((zfs_iter_bookmarks(pzhp, 0, zfs_open_bookmarks_cb, &cb_data) == 0) && (cb_data.zhp == NULL)) { (void) zfs_error(hdl, EZFS_NOENT, errbuf); zfs_close(pzhp); + errno = ENOENT; return (NULL); } if (cb_data.zhp == NULL) { @@ -755,6 +780,7 @@ zfs_open(libzfs_handle_t *hdl, const char *path, int types) if (!(types & zhp->zfs_type)) { (void) zfs_error(hdl, EZFS_BADTYPE, errbuf); zfs_close(zhp); + errno = EINVAL; return (NULL); } @@ -2083,7 +2109,8 @@ getprop_string(zfs_handle_t *zhp, zfs_prop_t prop, char **source) static boolean_t zfs_is_recvd_props_mode(zfs_handle_t *zhp) { - return (zhp->zfs_props == zhp->zfs_recvd_props); + return (zhp->zfs_props != NULL && + zhp->zfs_props == zhp->zfs_recvd_props); } static void @@ -2285,19 +2312,28 @@ get_numeric_property(zfs_handle_t *zhp, zfs_prop_t prop, zprop_source_t *src, *val = zhp->zfs_dmustats.dds_redacted; break; + case ZFS_PROP_GUID: + if (zhp->zfs_dmustats.dds_guid != 0) + *val = zhp->zfs_dmustats.dds_guid; + else + *val = getprop_uint64(zhp, prop, source); + break; + case ZFS_PROP_CREATETXG: /* * We can directly read createtxg property from zfs * handle for Filesystem, Snapshot and ZVOL types. */ - if ((zhp->zfs_type == ZFS_TYPE_FILESYSTEM) || + if (((zhp->zfs_type == ZFS_TYPE_FILESYSTEM) || (zhp->zfs_type == ZFS_TYPE_SNAPSHOT) || - (zhp->zfs_type == ZFS_TYPE_VOLUME)) { + (zhp->zfs_type == ZFS_TYPE_VOLUME)) && + (zhp->zfs_dmustats.dds_creation_txg != 0)) { *val = zhp->zfs_dmustats.dds_creation_txg; break; + } else { + *val = getprop_uint64(zhp, prop, source); } zfs_fallthrough; - default: switch (zfs_prop_get_type(prop)) { case PROP_TYPE_NUMBER: @@ -2440,7 +2476,7 @@ get_clones_cb(zfs_handle_t *zhp, void *arg) } out: - (void) zfs_iter_children(zhp, get_clones_cb, gca); + (void) zfs_iter_children(zhp, 0, get_clones_cb, gca); zfs_close(zhp); return (0); } @@ -2725,7 +2761,13 @@ zfs_prop_get(zfs_handle_t *zhp, zfs_prop_t prop, char *propbuf, size_t proplen, break; case ZFS_PROP_ORIGIN: - str = getprop_string(zhp, prop, &source); + if (*zhp->zfs_dmustats.dds_origin != '\0') { + str = (char *)&zhp->zfs_dmustats.dds_origin; + } else { + str = getprop_string(zhp, prop, &source); + } + if (str == NULL || *str == '\0') + str = zfs_prop_default_string(prop); if (str == NULL) return (-1); (void) strlcpy(propbuf, str, proplen); @@ -3883,7 +3925,7 @@ zfs_check_snap_cb(zfs_handle_t *zhp, void *arg) if (lzc_exists(name)) fnvlist_add_boolean(dd->nvl, name); - rv = zfs_iter_filesystems(zhp, zfs_check_snap_cb, dd); + rv = zfs_iter_filesystems(zhp, 0, zfs_check_snap_cb, dd); zfs_close(zhp); return (rv); } @@ -4121,7 +4163,7 @@ zfs_snapshot_cb(zfs_handle_t *zhp, void *arg) fnvlist_add_boolean(sd->sd_nvl, name); - rv = zfs_iter_filesystems(zhp, zfs_snapshot_cb, sd); + rv = zfs_iter_filesystems(zhp, 0, zfs_snapshot_cb, sd); } zfs_close(zhp); @@ -4298,7 +4340,7 @@ rollback_destroy(zfs_handle_t *zhp, void *data) rollback_data_t *cbp = data; if (zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG) > cbp->cb_create) { - cbp->cb_error |= zfs_iter_dependents(zhp, B_FALSE, + cbp->cb_error |= zfs_iter_dependents(zhp, 0, B_FALSE, rollback_destroy_dependent, cbp); cbp->cb_error |= zfs_destroy(zhp, B_FALSE); @@ -4338,10 +4380,10 @@ zfs_rollback(zfs_handle_t *zhp, zfs_handle_t *snap, boolean_t force) if (cb.cb_create > 0) min_txg = cb.cb_create; - (void) zfs_iter_snapshots(zhp, B_FALSE, rollback_destroy, &cb, + (void) zfs_iter_snapshots(zhp, 0, rollback_destroy, &cb, min_txg, 0); - (void) zfs_iter_bookmarks(zhp, rollback_destroy, &cb); + (void) zfs_iter_bookmarks(zhp, 0, rollback_destroy, &cb); if (cb.cb_error) return (-1); @@ -4922,7 +4964,7 @@ zfs_hold_one(zfs_handle_t *zhp, void *arg) fnvlist_add_string(ha->nvl, name, ha->tag); if (ha->recursive) - rv = zfs_iter_filesystems(zhp, zfs_hold_one, ha); + rv = zfs_iter_filesystems(zhp, 0, zfs_hold_one, ha); zfs_close(zhp); return (rv); } @@ -5053,7 +5095,7 @@ zfs_release_one(zfs_handle_t *zhp, void *arg) } if (ha->recursive) - rv = zfs_iter_filesystems(zhp, zfs_release_one, ha); + rv = zfs_iter_filesystems(zhp, 0, zfs_release_one, ha); zfs_close(zhp); return (rv); } diff --git a/sys/contrib/openzfs/lib/libzfs/libzfs_diff.c b/sys/contrib/openzfs/lib/libzfs/libzfs_diff.c index 84e140ede665..1330e7c3052a 100644 --- a/sys/contrib/openzfs/lib/libzfs/libzfs_diff.c +++ b/sys/contrib/openzfs/lib/libzfs/libzfs_diff.c @@ -44,6 +44,7 @@ #include #include #include +#include #include "libzfs_impl.h" #define ZDIFF_SNAPDIR "/.zfs/snapshot/" @@ -54,6 +55,10 @@ #define ZDIFF_REMOVED '-' #define ZDIFF_RENAMED "R" +#define ZDIFF_ADDED_COLOR ANSI_GREEN +#define ZDIFF_MODIFIED_COLOR ANSI_YELLOW +#define ZDIFF_REMOVED_COLOR ANSI_RED +#define ZDIFF_RENAMED_COLOR ANSI_BLUE /* * Given a {dsname, object id}, get the object path @@ -128,6 +133,25 @@ stream_bytes(FILE *fp, const char *string) } } +/* + * Takes the type of change (like `print_file`), outputs the appropriate color + */ +static const char * +type_to_color(char type) +{ + if (type == '+') + return (ZDIFF_ADDED_COLOR); + else if (type == '-') + return (ZDIFF_REMOVED_COLOR); + else if (type == 'M') + return (ZDIFF_MODIFIED_COLOR); + else if (type == 'R') + return (ZDIFF_RENAMED_COLOR); + else + return (NULL); +} + + static char get_what(mode_t what) { @@ -175,6 +199,8 @@ static void print_rename(FILE *fp, differ_info_t *di, const char *old, const char *new, zfs_stat_t *isb) { + if (isatty(fileno(fp))) + color_start(ZDIFF_RENAMED_COLOR); if (di->timestamped) (void) fprintf(fp, "%10lld.%09lld\t", (longlong_t)isb->zs_ctime[0], @@ -186,12 +212,18 @@ print_rename(FILE *fp, differ_info_t *di, const char *old, const char *new, (void) fputs(di->scripted ? "\t" : " -> ", fp); print_cmn(fp, di, new); (void) fputc('\n', fp); + + if (isatty(fileno(fp))) + color_end(); } static void print_link_change(FILE *fp, differ_info_t *di, int delta, const char *file, zfs_stat_t *isb) { + if (isatty(fileno(fp))) + color_start(ZDIFF_MODIFIED_COLOR); + if (di->timestamped) (void) fprintf(fp, "%10lld.%09lld\t", (longlong_t)isb->zs_ctime[0], @@ -201,12 +233,17 @@ print_link_change(FILE *fp, differ_info_t *di, int delta, const char *file, (void) fprintf(fp, "%c\t", get_what(isb->zs_mode)); print_cmn(fp, di, file); (void) fprintf(fp, "\t(%+d)\n", delta); + if (isatty(fileno(fp))) + color_end(); } static void print_file(FILE *fp, differ_info_t *di, char type, const char *file, zfs_stat_t *isb) { + if (isatty(fileno(fp))) + color_start(type_to_color(type)); + if (di->timestamped) (void) fprintf(fp, "%10lld.%09lld\t", (longlong_t)isb->zs_ctime[0], @@ -216,6 +253,9 @@ print_file(FILE *fp, differ_info_t *di, char type, const char *file, (void) fprintf(fp, "%c\t", get_what(isb->zs_mode)); print_cmn(fp, di, file); (void) fputc('\n', fp); + + if (isatty(fileno(fp))) + color_end(); } static int diff --git a/sys/contrib/openzfs/lib/libzfs/libzfs_iter.c b/sys/contrib/openzfs/lib/libzfs/libzfs_iter.c index a716521ab17d..55cb7a8b5035 100644 --- a/sys/contrib/openzfs/lib/libzfs/libzfs_iter.c +++ b/sys/contrib/openzfs/lib/libzfs/libzfs_iter.c @@ -39,7 +39,8 @@ #include "libzfs_impl.h" static int -zfs_iter_clones(zfs_handle_t *zhp, zfs_iter_f func, void *data) +zfs_iter_clones(zfs_handle_t *zhp, int flags __maybe_unused, zfs_iter_f func, + void *data) { nvlist_t *nvl = zfs_get_clones_nvl(zhp); nvpair_t *pair; @@ -69,6 +70,7 @@ zfs_do_list_ioctl(zfs_handle_t *zhp, int arg, zfs_cmd_t *zc) orig_cookie = zc->zc_cookie; top: (void) strlcpy(zc->zc_name, zhp->zfs_name, sizeof (zc->zc_name)); + zc->zc_objset_stats.dds_creation_txg = 0; rc = zfs_ioctl(zhp->zfs_hdl, arg, zc); if (rc == -1) { @@ -101,7 +103,7 @@ zfs_do_list_ioctl(zfs_handle_t *zhp, int arg, zfs_cmd_t *zc) * Iterate over all child filesystems */ int -zfs_iter_filesystems(zfs_handle_t *zhp, zfs_iter_f func, void *data) +zfs_iter_filesystems(zfs_handle_t *zhp, int flags, zfs_iter_f func, void *data) { zfs_cmd_t zc = {"\0"}; zfs_handle_t *nzhp; @@ -112,16 +114,21 @@ zfs_iter_filesystems(zfs_handle_t *zhp, zfs_iter_f func, void *data) zcmd_alloc_dst_nvlist(zhp->zfs_hdl, &zc, 0); + if ((flags & ZFS_ITER_SIMPLE) == ZFS_ITER_SIMPLE) + zc.zc_simple = B_TRUE; + while ((ret = zfs_do_list_ioctl(zhp, ZFS_IOC_DATASET_LIST_NEXT, &zc)) == 0) { + if (zc.zc_simple) + nzhp = make_dataset_simple_handle_zc(zhp, &zc); + else + nzhp = make_dataset_handle_zc(zhp->zfs_hdl, &zc); /* * Silently ignore errors, as the only plausible explanation is * that the pool has since been removed. */ - if ((nzhp = make_dataset_handle_zc(zhp->zfs_hdl, - &zc)) == NULL) { + if (nzhp == NULL) continue; - } if ((ret = func(nzhp, data)) != 0) { zcmd_free_nvlists(&zc); @@ -136,7 +143,7 @@ zfs_iter_filesystems(zfs_handle_t *zhp, zfs_iter_f func, void *data) * Iterate over all snapshots */ int -zfs_iter_snapshots(zfs_handle_t *zhp, boolean_t simple, zfs_iter_f func, +zfs_iter_snapshots(zfs_handle_t *zhp, int flags, zfs_iter_f func, void *data, uint64_t min_txg, uint64_t max_txg) { zfs_cmd_t zc = {"\0"}; @@ -148,7 +155,7 @@ zfs_iter_snapshots(zfs_handle_t *zhp, boolean_t simple, zfs_iter_f func, zhp->zfs_type == ZFS_TYPE_BOOKMARK) return (0); - zc.zc_simple = simple; + zc.zc_simple = (flags & ZFS_ITER_SIMPLE) != 0; zcmd_alloc_dst_nvlist(zhp->zfs_hdl, &zc, 0); @@ -168,7 +175,7 @@ zfs_iter_snapshots(zfs_handle_t *zhp, boolean_t simple, zfs_iter_f func, while ((ret = zfs_do_list_ioctl(zhp, ZFS_IOC_SNAPSHOT_LIST_NEXT, &zc)) == 0) { - if (simple) + if (zc.zc_simple) nzhp = make_dataset_simple_handle_zc(zhp, &zc); else nzhp = make_dataset_handle_zc(zhp->zfs_hdl, &zc); @@ -190,7 +197,8 @@ zfs_iter_snapshots(zfs_handle_t *zhp, boolean_t simple, zfs_iter_f func, * Iterate over all bookmarks */ int -zfs_iter_bookmarks(zfs_handle_t *zhp, zfs_iter_f func, void *data) +zfs_iter_bookmarks(zfs_handle_t *zhp, int flags __maybe_unused, + zfs_iter_f func, void *data) { zfs_handle_t *nzhp; nvlist_t *props = NULL; @@ -297,8 +305,8 @@ zfs_snapshot_compare(const void *larg, const void *rarg) } int -zfs_iter_snapshots_sorted(zfs_handle_t *zhp, zfs_iter_f callback, void *data, - uint64_t min_txg, uint64_t max_txg) +zfs_iter_snapshots_sorted(zfs_handle_t *zhp, int flags, zfs_iter_f callback, + void *data, uint64_t min_txg, uint64_t max_txg) { int ret = 0; zfs_node_t *node; @@ -308,7 +316,7 @@ zfs_iter_snapshots_sorted(zfs_handle_t *zhp, zfs_iter_f callback, void *data, avl_create(&avl, zfs_snapshot_compare, sizeof (zfs_node_t), offsetof(zfs_node_t, zn_avlnode)); - ret = zfs_iter_snapshots(zhp, B_FALSE, zfs_sort_snaps, &avl, min_txg, + ret = zfs_iter_snapshots(zhp, flags, zfs_sort_snaps, &avl, min_txg, max_txg); for (node = avl_first(&avl); node != NULL; node = AVL_NEXT(&avl, node)) @@ -371,7 +379,7 @@ snapspec_cb(zfs_handle_t *zhp, void *arg) * return ENOENT at the end. */ int -zfs_iter_snapspec(zfs_handle_t *fs_zhp, const char *spec_orig, +zfs_iter_snapspec(zfs_handle_t *fs_zhp, int flags, const char *spec_orig, zfs_iter_f func, void *arg) { char *buf, *comma_separated, *cp; @@ -411,7 +419,7 @@ zfs_iter_snapspec(zfs_handle_t *fs_zhp, const char *spec_orig, } } - err = zfs_iter_snapshots_sorted(fs_zhp, + err = zfs_iter_snapshots_sorted(fs_zhp, flags, snapspec_cb, &ssa, 0, 0); if (ret == 0) ret = err; @@ -448,14 +456,14 @@ zfs_iter_snapspec(zfs_handle_t *fs_zhp, const char *spec_orig, * and as close as possible. */ int -zfs_iter_children(zfs_handle_t *zhp, zfs_iter_f func, void *data) +zfs_iter_children(zfs_handle_t *zhp, int flags, zfs_iter_f func, void *data) { int ret; - if ((ret = zfs_iter_snapshots(zhp, B_FALSE, func, data, 0, 0)) != 0) + if ((ret = zfs_iter_snapshots(zhp, flags, func, data, 0, 0)) != 0) return (ret); - return (zfs_iter_filesystems(zhp, func, data)); + return (zfs_iter_filesystems(zhp, flags, func, data)); } @@ -466,6 +474,7 @@ typedef struct iter_stack_frame { typedef struct iter_dependents_arg { boolean_t first; + int flags; boolean_t allowrecursion; iter_stack_frame_t *stack; zfs_iter_f func; @@ -481,7 +490,7 @@ iter_dependents_cb(zfs_handle_t *zhp, void *arg) ida->first = B_FALSE; if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT) { - err = zfs_iter_clones(zhp, iter_dependents_cb, ida); + err = zfs_iter_clones(zhp, ida->flags, iter_dependents_cb, ida); } else if (zhp->zfs_type != ZFS_TYPE_BOOKMARK) { iter_stack_frame_t isf; iter_stack_frame_t *f; @@ -515,9 +524,10 @@ iter_dependents_cb(zfs_handle_t *zhp, void *arg) isf.zhp = zhp; isf.next = ida->stack; ida->stack = &isf; - err = zfs_iter_filesystems(zhp, iter_dependents_cb, ida); + err = zfs_iter_filesystems(zhp, ida->flags, + iter_dependents_cb, ida); if (err == 0) - err = zfs_iter_snapshots(zhp, B_FALSE, + err = zfs_iter_snapshots(zhp, ida->flags, iter_dependents_cb, ida, 0, 0); ida->stack = isf.next; } @@ -531,10 +541,11 @@ iter_dependents_cb(zfs_handle_t *zhp, void *arg) } int -zfs_iter_dependents(zfs_handle_t *zhp, boolean_t allowrecursion, +zfs_iter_dependents(zfs_handle_t *zhp, int flags, boolean_t allowrecursion, zfs_iter_f func, void *data) { iter_dependents_arg_t ida; + ida.flags = flags; ida.allowrecursion = allowrecursion; ida.stack = NULL; ida.func = func; diff --git a/sys/contrib/openzfs/lib/libzfs/libzfs_mount.c b/sys/contrib/openzfs/lib/libzfs/libzfs_mount.c index 44f7d698c82c..57737bc6c01a 100644 --- a/sys/contrib/openzfs/lib/libzfs/libzfs_mount.c +++ b/sys/contrib/openzfs/lib/libzfs/libzfs_mount.c @@ -940,7 +940,7 @@ zfs_iter_cb(zfs_handle_t *zhp, void *data) } libzfs_add_handle(cbp, zhp); - if (zfs_iter_filesystems(zhp, zfs_iter_cb, cbp) != 0) { + if (zfs_iter_filesystems(zhp, 0, zfs_iter_cb, cbp) != 0) { zfs_close(zhp); return (-1); } @@ -1289,7 +1289,7 @@ zpool_enable_datasets(zpool_handle_t *zhp, const char *mntopts, int flags) * over all child filesystems. */ libzfs_add_handle(&cb, zfsp); - if (zfs_iter_filesystems(zfsp, zfs_iter_cb, &cb) != 0) + if (zfs_iter_filesystems(zfsp, 0, zfs_iter_cb, &cb) != 0) goto out; /* diff --git a/sys/contrib/openzfs/lib/libzfs/libzfs_pool.c b/sys/contrib/openzfs/lib/libzfs/libzfs_pool.c index 7f7e19a090bc..b3e12bd84a2d 100644 --- a/sys/contrib/openzfs/lib/libzfs/libzfs_pool.c +++ b/sys/contrib/openzfs/lib/libzfs/libzfs_pool.c @@ -2961,7 +2961,7 @@ zpool_vdev_online(zpool_handle_t *zhp, const char *path, int flags, zc.zc_guid = fnvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID); - if (avail_spare) + if (!(flags & ZFS_ONLINE_SPARE) && avail_spare) return (zfs_error(hdl, EZFS_ISSPARE, errbuf)); #ifndef __FreeBSD__ @@ -3098,9 +3098,6 @@ zpool_vdev_remove_wanted(zpool_handle_t *zhp, const char *path) zc.zc_guid = fnvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID); - if (avail_spare) - return (zfs_error(hdl, EZFS_ISSPARE, errbuf)); - zc.zc_cookie = VDEV_STATE_REMOVED; if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SET_STATE, &zc) == 0) @@ -4133,33 +4130,28 @@ zpool_get_errlog(zpool_handle_t *zhp, nvlist_t **nverrlistp) { zfs_cmd_t zc = {"\0"}; libzfs_handle_t *hdl = zhp->zpool_hdl; - uint64_t count; - zbookmark_phys_t *zb = NULL; - int i; + zbookmark_phys_t *buf; + uint64_t buflen = 10000; /* approx. 1MB of RAM */ + + if (fnvlist_lookup_uint64(zhp->zpool_config, + ZPOOL_CONFIG_ERRCOUNT) == 0) + return (0); /* - * Retrieve the raw error list from the kernel. If the number of errors - * has increased, allocate more space and continue until we get the - * entire list. + * Retrieve the raw error list from the kernel. If it doesn't fit, + * allocate a larger buffer and retry. */ - count = fnvlist_lookup_uint64(zhp->zpool_config, ZPOOL_CONFIG_ERRCOUNT); - if (count == 0) - return (0); - zc.zc_nvlist_dst = (uintptr_t)zfs_alloc(zhp->zpool_hdl, - count * sizeof (zbookmark_phys_t)); - zc.zc_nvlist_dst_size = count; (void) strcpy(zc.zc_name, zhp->zpool_name); for (;;) { + buf = zfs_alloc(zhp->zpool_hdl, + buflen * sizeof (zbookmark_phys_t)); + zc.zc_nvlist_dst = (uintptr_t)buf; + zc.zc_nvlist_dst_size = buflen; if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_ERROR_LOG, &zc) != 0) { - free((void *)(uintptr_t)zc.zc_nvlist_dst); + free(buf); if (errno == ENOMEM) { - void *dst; - - count = zc.zc_nvlist_dst_size; - dst = zfs_alloc(zhp->zpool_hdl, count * - sizeof (zbookmark_phys_t)); - zc.zc_nvlist_dst = (uintptr_t)dst; + buflen *= 2; } else { return (zpool_standard_error_fmt(hdl, errno, dgettext(TEXT_DOMAIN, "errors: List of " @@ -4177,18 +4169,17 @@ zpool_get_errlog(zpool_handle_t *zhp, nvlist_t **nverrlistp) * _not_ copied as part of the process. So we point the start of our * array appropriate and decrement the total number of elements. */ - zb = ((zbookmark_phys_t *)(uintptr_t)zc.zc_nvlist_dst) + - zc.zc_nvlist_dst_size; - count -= zc.zc_nvlist_dst_size; + zbookmark_phys_t *zb = buf + zc.zc_nvlist_dst_size; + uint64_t zblen = buflen - zc.zc_nvlist_dst_size; - qsort(zb, count, sizeof (zbookmark_phys_t), zbookmark_mem_compare); + qsort(zb, zblen, sizeof (zbookmark_phys_t), zbookmark_mem_compare); verify(nvlist_alloc(nverrlistp, 0, KM_SLEEP) == 0); /* * Fill in the nverrlistp with nvlist's of dataset and object numbers. */ - for (i = 0; i < count; i++) { + for (uint64_t i = 0; i < zblen; i++) { nvlist_t *nv; /* ignoring zb_blkid and zb_level for now */ @@ -4215,11 +4206,11 @@ zpool_get_errlog(zpool_handle_t *zhp, nvlist_t **nverrlistp) nvlist_free(nv); } - free((void *)(uintptr_t)zc.zc_nvlist_dst); + free(buf); return (0); nomem: - free((void *)(uintptr_t)zc.zc_nvlist_dst); + free(buf); return (no_memory(zhp->zpool_hdl)); } @@ -5011,6 +5002,17 @@ zpool_get_vdev_prop_value(nvlist_t *nvprop, vdev_prop_t prop, char *prop_name, (u_longlong_t)intval); } break; + case VDEV_PROP_CHECKSUM_N: + case VDEV_PROP_CHECKSUM_T: + case VDEV_PROP_IO_N: + case VDEV_PROP_IO_T: + if (intval == UINT64_MAX) { + (void) strlcpy(buf, "-", len); + } else { + (void) snprintf(buf, len, "%llu", + (u_longlong_t)intval); + } + break; case VDEV_PROP_FRAGMENTATION: if (intval == UINT64_MAX) { (void) strlcpy(buf, "-", len); diff --git a/sys/contrib/openzfs/lib/libzfs/libzfs_sendrecv.c b/sys/contrib/openzfs/lib/libzfs/libzfs_sendrecv.c index b53acdcea73e..038613a1fcfa 100644 --- a/sys/contrib/openzfs/lib/libzfs/libzfs_sendrecv.c +++ b/sys/contrib/openzfs/lib/libzfs/libzfs_sendrecv.c @@ -83,6 +83,8 @@ typedef struct progress_arg { boolean_t pa_parsable; boolean_t pa_estimate; int pa_verbosity; + boolean_t pa_astitle; + uint64_t pa_size; } progress_arg_t; static int @@ -616,10 +618,10 @@ send_iterate_fs(zfs_handle_t *zhp, void *arg) min_txg = fromsnap_txg; if (!sd->replicate && tosnap_txg != 0) max_txg = tosnap_txg; - (void) zfs_iter_snapshots_sorted(zhp, send_iterate_snap, sd, + (void) zfs_iter_snapshots_sorted(zhp, 0, send_iterate_snap, sd, min_txg, max_txg); } else { - char snapname[MAXPATHLEN]; + char snapname[MAXPATHLEN] = { 0 }; zfs_handle_t *snap; (void) snprintf(snapname, sizeof (snapname), "%s@%s", @@ -659,7 +661,7 @@ send_iterate_fs(zfs_handle_t *zhp, void *arg) /* Iterate over children. */ if (sd->recursive) - rv = zfs_iter_filesystems(zhp, send_iterate_fs, sd); + rv = zfs_iter_filesystems(zhp, 0, send_iterate_fs, sd); out: /* Restore saved fields. */ @@ -733,6 +735,7 @@ typedef struct send_dump_data { boolean_t seenfrom, seento, replicate, doall, fromorigin; boolean_t dryrun, parsable, progress, embed_data, std_out; boolean_t large_block, compress, raw, holds; + boolean_t progressastitle; int outfd; boolean_t err; nvlist_t *fss; @@ -931,12 +934,13 @@ send_progress_thread(void *arg) zfs_handle_t *zhp = pa->pa_zhp; uint64_t bytes; uint64_t blocks; + uint64_t total = pa->pa_size / 100; char buf[16]; time_t t; struct tm tm; int err; - if (!pa->pa_parsable) { + if (!pa->pa_parsable && pa->pa_verbosity != 0) { (void) fprintf(stderr, "TIME %s %sSNAPSHOT %s\n", pa->pa_estimate ? "BYTES" : " SENT", @@ -959,6 +963,17 @@ send_progress_thread(void *arg) (void) time(&t); localtime_r(&t, &tm); + if (pa->pa_astitle) { + char buf_bytes[16]; + char buf_size[16]; + int pct; + zfs_nicenum(bytes, buf_bytes, sizeof (buf_bytes)); + zfs_nicenum(pa->pa_size, buf_size, sizeof (buf_size)); + pct = (total > 0) ? bytes / total : 100; + zfs_setproctitle("sending %s (%d%%: %s/%s)", + zhp->zfs_name, MIN(pct, 100), buf_bytes, buf_size); + } + if (pa->pa_verbosity >= 2 && pa->pa_parsable) { (void) fprintf(stderr, "%02d:%02d:%02d\t%llu\t%llu\t%s\n", @@ -975,7 +990,7 @@ send_progress_thread(void *arg) (void) fprintf(stderr, "%02d:%02d:%02d\t%llu\t%s\n", tm.tm_hour, tm.tm_min, tm.tm_sec, (u_longlong_t)bytes, zhp->zfs_name); - } else { + } else if (pa->pa_verbosity != 0) { zfs_nicebytes(bytes, buf, sizeof (buf)); (void) fprintf(stderr, "%02d:%02d:%02d %5s %s\n", tm.tm_hour, tm.tm_min, tm.tm_sec, @@ -1183,12 +1198,14 @@ dump_snapshot(zfs_handle_t *zhp, void *arg) * If progress reporting is requested, spawn a new thread to * poll ZFS_IOC_SEND_PROGRESS at a regular interval. */ - if (sdd->progress) { + if (sdd->progress || sdd->progressastitle) { pa.pa_zhp = zhp; pa.pa_fd = sdd->outfd; pa.pa_parsable = sdd->parsable; pa.pa_estimate = B_FALSE; pa.pa_verbosity = sdd->verbosity; + pa.pa_size = sdd->size; + pa.pa_astitle = sdd->progressastitle; if ((err = pthread_create(&tid, NULL, send_progress_thread, &pa)) != 0) { @@ -1200,7 +1217,7 @@ dump_snapshot(zfs_handle_t *zhp, void *arg) err = dump_ioctl(zhp, sdd->prevsnap, sdd->prevsnap_obj, fromorigin, sdd->outfd, flags, sdd->debugnv); - if (sdd->progress && + if ((sdd->progress || sdd->progressastitle) && send_progress_thread_exit(zhp->zfs_hdl, tid)) return (-1); } @@ -1274,7 +1291,7 @@ dump_filesystem(zfs_handle_t *zhp, send_dump_data_t *sdd) zhp->zfs_name, sdd->tosnap); } } - rv = zfs_iter_snapshots_sorted(zhp, dump_snapshot, sdd, + rv = zfs_iter_snapshots_sorted(zhp, 0, dump_snapshot, sdd, min_txg, max_txg); } else { char snapname[MAXPATHLEN] = { 0 }; @@ -1289,7 +1306,7 @@ dump_filesystem(zfs_handle_t *zhp, send_dump_data_t *sdd) if (snap != NULL) rv = dump_snapshot(snap, sdd); else - rv = -1; + rv = errno; } /* Dump tosnap. */ @@ -1301,7 +1318,7 @@ dump_filesystem(zfs_handle_t *zhp, send_dump_data_t *sdd) if (snap != NULL) rv = dump_snapshot(snap, sdd); else - rv = -1; + rv = errno; } } @@ -1536,7 +1553,7 @@ lzc_flags_from_sendflags(const sendflags_t *flags) static int estimate_size(zfs_handle_t *zhp, const char *from, int fd, sendflags_t *flags, uint64_t resumeobj, uint64_t resumeoff, uint64_t bytes, - const char *redactbook, char *errbuf) + const char *redactbook, char *errbuf, uint64_t *sizep) { uint64_t size; FILE *fout = flags->dryrun ? stdout : stderr; @@ -1544,7 +1561,7 @@ estimate_size(zfs_handle_t *zhp, const char *from, int fd, sendflags_t *flags, int err = 0; pthread_t ptid; - if (flags->progress) { + if (flags->progress || flags->progressastitle) { pa.pa_zhp = zhp; pa.pa_fd = fd; pa.pa_parsable = flags->parsable; @@ -1563,10 +1580,15 @@ estimate_size(zfs_handle_t *zhp, const char *from, int fd, sendflags_t *flags, err = lzc_send_space_resume_redacted(zhp->zfs_name, from, lzc_flags_from_sendflags(flags), resumeobj, resumeoff, bytes, redactbook, fd, &size); + *sizep = size; - if (flags->progress && send_progress_thread_exit(zhp->zfs_hdl, ptid)) + if ((flags->progress || flags->progressastitle) && + send_progress_thread_exit(zhp->zfs_hdl, ptid)) return (-1); + if (!flags->progress && !flags->parsable) + return (err); + if (err != 0) { zfs_error_aux(zhp->zfs_hdl, "%s", strerror(err)); return (zfs_error(zhp->zfs_hdl, EZFS_BADBACKUP, @@ -1743,6 +1765,7 @@ zfs_send_resume_impl_cb_impl(libzfs_handle_t *hdl, sendflags_t *flags, uint64_t *redact_snap_guids = NULL; int num_redact_snaps = 0; char *redact_book = NULL; + uint64_t size = 0; (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot resume send")); @@ -1828,7 +1851,7 @@ zfs_send_resume_impl_cb_impl(libzfs_handle_t *hdl, sendflags_t *flags, enum lzc_send_flags lzc_flags = lzc_flags_from_sendflags(flags) | lzc_flags_from_resume_nvl(resume_nvl); - if (flags->verbosity != 0) { + if (flags->verbosity != 0 || flags->progressastitle) { /* * Some of these may have come from the resume token, set them * here for size estimate purposes. @@ -1845,7 +1868,7 @@ zfs_send_resume_impl_cb_impl(libzfs_handle_t *hdl, sendflags_t *flags, if (lzc_flags & LZC_SEND_FLAG_SAVED) tmpflags.saved = B_TRUE; error = estimate_size(zhp, fromname, outfd, &tmpflags, - resumeobj, resumeoff, bytes, redact_book, errbuf); + resumeobj, resumeoff, bytes, redact_book, errbuf, &size); } if (!flags->dryrun) { @@ -1855,12 +1878,14 @@ zfs_send_resume_impl_cb_impl(libzfs_handle_t *hdl, sendflags_t *flags, * If progress reporting is requested, spawn a new thread to * poll ZFS_IOC_SEND_PROGRESS at a regular interval. */ - if (flags->progress) { + if (flags->progress || flags->progressastitle) { pa.pa_zhp = zhp; pa.pa_fd = outfd; pa.pa_parsable = flags->parsable; pa.pa_estimate = B_FALSE; pa.pa_verbosity = flags->verbosity; + pa.pa_size = size; + pa.pa_astitle = flags->progressastitle; error = pthread_create(&tid, NULL, send_progress_thread, &pa); @@ -1877,8 +1902,11 @@ zfs_send_resume_impl_cb_impl(libzfs_handle_t *hdl, sendflags_t *flags, if (redact_book != NULL) free(redact_book); - if (flags->progress && send_progress_thread_exit(hdl, tid)) + if ((flags->progressastitle || flags->progress) && + send_progress_thread_exit(hdl, tid)) { + zfs_close(zhp); return (-1); + } char errbuf[ERRBUFLEN]; (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, @@ -2313,6 +2341,7 @@ zfs_send_cb_impl(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap, sdd.verbosity = flags->verbosity; sdd.parsable = flags->parsable; sdd.progress = flags->progress; + sdd.progressastitle = flags->progressastitle; sdd.dryrun = flags->dryrun; sdd.large_block = flags->largeblock; sdd.embed_data = flags->embed_data; @@ -2562,6 +2591,7 @@ zfs_send_one_cb_impl(zfs_handle_t *zhp, const char *from, int fd, char *name = zhp->zfs_name; pthread_t ptid; progress_arg_t pa = { 0 }; + uint64_t size = 0; char errbuf[ERRBUFLEN]; (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, @@ -2644,9 +2674,9 @@ zfs_send_one_cb_impl(zfs_handle_t *zhp, const char *from, int fd, /* * Perform size estimate if verbose was specified. */ - if (flags->verbosity != 0) { + if (flags->verbosity != 0 || flags->progressastitle) { err = estimate_size(zhp, from, fd, flags, 0, 0, 0, redactbook, - errbuf); + errbuf, &size); if (err != 0) return (err); } @@ -2658,12 +2688,14 @@ zfs_send_one_cb_impl(zfs_handle_t *zhp, const char *from, int fd, * If progress reporting is requested, spawn a new thread to poll * ZFS_IOC_SEND_PROGRESS at a regular interval. */ - if (flags->progress) { + if (flags->progress || flags->progressastitle) { pa.pa_zhp = zhp; pa.pa_fd = fd; pa.pa_parsable = flags->parsable; pa.pa_estimate = B_FALSE; pa.pa_verbosity = flags->verbosity; + pa.pa_size = size; + pa.pa_astitle = flags->progressastitle; err = pthread_create(&ptid, NULL, send_progress_thread, &pa); @@ -2677,7 +2709,8 @@ zfs_send_one_cb_impl(zfs_handle_t *zhp, const char *from, int fd, err = lzc_send_redacted(name, from, fd, lzc_flags_from_sendflags(flags), redactbook); - if (flags->progress && send_progress_thread_exit(hdl, ptid)) + if ((flags->progress || flags->progressastitle) && + send_progress_thread_exit(hdl, ptid)) return (-1); if (err == 0 && (flags->props || flags->holds || flags->backup)) { @@ -3125,9 +3158,9 @@ guid_to_name_cb(zfs_handle_t *zhp, void *arg) return (EEXIST); } - err = zfs_iter_children(zhp, guid_to_name_cb, gtnd); + err = zfs_iter_children(zhp, 0, guid_to_name_cb, gtnd); if (err != EEXIST && gtnd->bookmark_ok) - err = zfs_iter_bookmarks(zhp, guid_to_name_cb, gtnd); + err = zfs_iter_bookmarks(zhp, 0, guid_to_name_cb, gtnd); zfs_close(zhp); return (err); } @@ -3181,9 +3214,10 @@ guid_to_name_redact_snaps(libzfs_handle_t *hdl, const char *parent, continue; int err = guid_to_name_cb(zfs_handle_dup(zhp), >nd); if (err != EEXIST) - err = zfs_iter_children(zhp, guid_to_name_cb, >nd); + err = zfs_iter_children(zhp, 0, guid_to_name_cb, >nd); if (err != EEXIST && bookmark_ok) - err = zfs_iter_bookmarks(zhp, guid_to_name_cb, >nd); + err = zfs_iter_bookmarks(zhp, 0, guid_to_name_cb, + >nd); zfs_close(zhp); if (err == EEXIST) return (0); @@ -4149,6 +4183,15 @@ zfs_setup_cmdline_props(libzfs_handle_t *hdl, zfs_type_t type, goto error; } + /* + * For plain replicated send, we can ignore encryption + * properties other than first stream + */ + if ((zfs_prop_encryption_key_param(prop) || prop == + ZFS_PROP_ENCRYPTION) && !newfs && recursive && !raw) { + continue; + } + /* incremental streams can only exclude encryption properties */ if ((zfs_prop_encryption_key_param(prop) || prop == ZFS_PROP_ENCRYPTION) && !newfs && @@ -4250,7 +4293,8 @@ zfs_setup_cmdline_props(libzfs_handle_t *hdl, zfs_type_t type, if (cp != NULL) *cp = '\0'; - if (!raw && zfs_crypto_create(hdl, namebuf, voprops, NULL, + if (!raw && !(!newfs && recursive) && + zfs_crypto_create(hdl, namebuf, voprops, NULL, B_FALSE, wkeydata_out, wkeylen_out) != 0) { fnvlist_free(voprops); ret = zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf); @@ -5073,14 +5117,14 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, *cp = '@'; break; case EINVAL: - if (flags->resumable) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "kernel modules must be upgraded to " - "receive this stream.")); - } else if (embedded && !raw) { + if (embedded && !raw) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "incompatible embedded data stream " "feature with encrypted receive.")); + } else if (flags->resumable) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "kernel modules must be upgraded to " + "receive this stream.")); } (void) zfs_error(hdl, EZFS_BADSTREAM, errbuf); break; @@ -5153,6 +5197,14 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, destsnap); *cp = '@'; break; + case E2BIG: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "zfs receive required kernel memory allocation " + "larger than the system can support. Please file " + "an issue at the OpenZFS issue tracker:\n" + "https://github.com/openzfs/zfs/issues/new")); + (void) zfs_error(hdl, EZFS_BADSTREAM, errbuf); + break; case EBUSY: if (hastoken) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, diff --git a/sys/contrib/openzfs/lib/libzfs/libzfs_status.c b/sys/contrib/openzfs/lib/libzfs/libzfs_status.c index 6999d9afc5cd..27bb4476d706 100644 --- a/sys/contrib/openzfs/lib/libzfs/libzfs_status.c +++ b/sys/contrib/openzfs/lib/libzfs/libzfs_status.c @@ -222,7 +222,6 @@ check_status(nvlist_t *config, boolean_t isimport, { pool_scan_stat_t *ps = NULL; uint_t vsc, psc; - uint64_t nerr; uint64_t suspended; uint64_t hostid = 0; uint64_t errata = 0; @@ -392,6 +391,7 @@ check_status(nvlist_t *config, boolean_t isimport, * Persistent data errors. */ if (!isimport) { + uint64_t nerr; if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_ERRCOUNT, &nerr) == 0 && nerr != 0) return (ZPOOL_STATUS_CORRUPT_DATA); diff --git a/sys/contrib/openzfs/lib/libzfs/libzfs_util.c b/sys/contrib/openzfs/lib/libzfs/libzfs_util.c index b4679dbb36fd..2507bfecdc9b 100644 --- a/sys/contrib/openzfs/lib/libzfs/libzfs_util.c +++ b/sys/contrib/openzfs/lib/libzfs/libzfs_util.c @@ -1681,6 +1681,18 @@ zprop_parse_value(libzfs_handle_t *hdl, nvpair_t *elem, int prop, *ivalp = UINT64_MAX; } + /* + * Special handling for "checksum_*=none". In this case it's not + * 0 but UINT64_MAX. + */ + if ((type & ZFS_TYPE_VDEV) && isnone && + (prop == VDEV_PROP_CHECKSUM_N || + prop == VDEV_PROP_CHECKSUM_T || + prop == VDEV_PROP_IO_N || + prop == VDEV_PROP_IO_T)) { + *ivalp = UINT64_MAX; + } + /* * Special handling for setting 'refreservation' to 'auto'. Use * UINT64_MAX to tell the caller to use zfs_fix_auto_resv(). @@ -2010,15 +2022,20 @@ use_color(void) void color_start(const char *color) { - if (use_color()) + if (use_color()) { fputs(color, stdout); + fflush(stdout); + } } void color_end(void) { - if (use_color()) + if (use_color()) { fputs(ANSI_RESET, stdout); + fflush(stdout); + } + } /* printf() with a color. If color is NULL, then do a normal printf. */ diff --git a/sys/contrib/openzfs/lib/libzpool/kernel.c b/sys/contrib/openzfs/lib/libzpool/kernel.c index 0e3e4cee7baa..a9b9bf4c2ce5 100644 --- a/sys/contrib/openzfs/lib/libzpool/kernel.c +++ b/sys/contrib/openzfs/lib/libzpool/kernel.c @@ -770,10 +770,8 @@ random_get_pseudo_bytes(uint8_t *ptr, size_t len) int ddi_strtoull(const char *str, char **nptr, int base, u_longlong_t *result) { - (void) nptr; - char *end; - - *result = strtoull(str, &end, base); + errno = 0; + *result = strtoull(str, nptr, base); if (*result == 0) return (errno); return (0); diff --git a/sys/contrib/openzfs/lib/libzutil/Makefile.am b/sys/contrib/openzfs/lib/libzutil/Makefile.am index ecdf940508b2..519906235f7f 100644 --- a/sys/contrib/openzfs/lib/libzutil/Makefile.am +++ b/sys/contrib/openzfs/lib/libzutil/Makefile.am @@ -17,6 +17,7 @@ libzutil_la_SOURCES = \ if BUILD_LINUX libzutil_la_SOURCES += \ + %D%/os/linux/zutil_setproctitle.c \ %D%/os/linux/zutil_device_path_os.c \ %D%/os/linux/zutil_import_os.c endif diff --git a/sys/contrib/openzfs/lib/libzutil/os/linux/zutil_setproctitle.c b/sys/contrib/openzfs/lib/libzutil/os/linux/zutil_setproctitle.c new file mode 100644 index 000000000000..4a6d12cf70cf --- /dev/null +++ b/sys/contrib/openzfs/lib/libzutil/os/linux/zutil_setproctitle.c @@ -0,0 +1,299 @@ +/* + * Copyright © 2013 Guillem Jover + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY + * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static struct { + /* Original value. */ + const char *arg0; + + /* Title space available. */ + char *base, *end; + + /* Pointer to original nul character within base. */ + char *nul; + + boolean_t warned; + boolean_t reset; + int error; +} SPT; + +#define LIBBSD_IS_PATHNAME_SEPARATOR(c) ((c) == '/') +#define SPT_MAXTITLE 255 + +extern const char *__progname; + +static const char * +getprogname(void) +{ + return (__progname); +} + +static void +setprogname(const char *progname) +{ + size_t i; + + for (i = strlen(progname); i > 0; i--) { + if (LIBBSD_IS_PATHNAME_SEPARATOR(progname[i - 1])) { + __progname = progname + i; + return; + } + } + __progname = progname; +} + + +static inline size_t +spt_min(size_t a, size_t b) +{ + return ((a < b) ? a : b); +} + +/* + * For discussion on the portability of the various methods, see + * https://lists.freebsd.org/pipermail/freebsd-stable/2008-June/043136.html + */ +static int +spt_clearenv(void) +{ + char **tmp; + + tmp = malloc(sizeof (*tmp)); + if (tmp == NULL) + return (errno); + + tmp[0] = NULL; + environ = tmp; + + return (0); +} + +static int +spt_copyenv(int envc, char *envp[]) +{ + char **envcopy; + char *eq; + int envsize; + int i, error; + + if (environ != envp) + return (0); + + /* + * Make a copy of the old environ array of pointers, in case + * clearenv() or setenv() is implemented to free the internal + * environ array, because we will need to access the old environ + * contents to make the new copy. + */ + envsize = (envc + 1) * sizeof (char *); + envcopy = malloc(envsize); + if (envcopy == NULL) + return (errno); + memcpy(envcopy, envp, envsize); + + error = spt_clearenv(); + if (error) { + environ = envp; + free(envcopy); + return (error); + } + + for (i = 0; envcopy[i]; i++) { + eq = strchr(envcopy[i], '='); + if (eq == NULL) + continue; + + *eq = '\0'; + if (setenv(envcopy[i], eq + 1, 1) < 0) + error = errno; + *eq = '='; + + if (error) { + environ = envp; + free(envcopy); + return (error); + } + } + + /* + * Dispose of the shallow copy, now that we've finished transfering + * the old environment. + */ + free(envcopy); + + return (0); +} + +static int +spt_copyargs(int argc, char *argv[]) +{ + char *tmp; + int i; + + for (i = 1; i < argc || (i >= argc && argv[i]); i++) { + if (argv[i] == NULL) + continue; + + tmp = strdup(argv[i]); + if (tmp == NULL) + return (errno); + + argv[i] = tmp; + } + + return (0); +} + +void +zfs_setproctitle_init(int argc, char *argv[], char *envp[]) +{ + char *base, *end, *nul, *tmp; + int i, envc, error; + + /* Try to make sure we got called with main() arguments. */ + if (argc < 0) + return; + + base = argv[0]; + if (base == NULL) + return; + + nul = base + strlen(base); + end = nul + 1; + + for (i = 0; i < argc || (i >= argc && argv[i]); i++) { + if (argv[i] == NULL || argv[i] != end) + continue; + + end = argv[i] + strlen(argv[i]) + 1; + } + + for (i = 0; envp[i]; i++) { + if (envp[i] != end) + continue; + + end = envp[i] + strlen(envp[i]) + 1; + } + envc = i; + + SPT.arg0 = strdup(argv[0]); + if (SPT.arg0 == NULL) { + SPT.error = errno; + return; + } + + tmp = strdup(getprogname()); + if (tmp == NULL) { + SPT.error = errno; + return; + } + setprogname(tmp); + + error = spt_copyenv(envc, envp); + if (error) { + SPT.error = error; + return; + } + + error = spt_copyargs(argc, argv); + if (error) { + SPT.error = error; + return; + } + + SPT.nul = nul; + SPT.base = base; + SPT.end = end; +} + +void +zfs_setproctitle(const char *fmt, ...) +{ + /* Use buffer in case argv[0] is passed. */ + char buf[SPT_MAXTITLE + 1]; + va_list ap; + char *nul; + int len; + if (SPT.base == NULL) { + if (!SPT.warned) { + warnx("setproctitle not initialized, please" + "call zfs_setproctitle_init()"); + SPT.warned = B_TRUE; + } + return; + } + + if (fmt) { + if (fmt[0] == '-') { + /* Skip program name prefix. */ + fmt++; + len = 0; + } else { + /* Print program name heading for grep. */ + snprintf(buf, sizeof (buf), "%s: ", getprogname()); + len = strlen(buf); + } + + va_start(ap, fmt); + len += vsnprintf(buf + len, sizeof (buf) - len, fmt, ap); + va_end(ap); + } else { + len = snprintf(buf, sizeof (buf), "%s", SPT.arg0); + } + + if (len <= 0) { + SPT.error = errno; + return; + } + + if (!SPT.reset) { + memset(SPT.base, 0, SPT.end - SPT.base); + SPT.reset = B_TRUE; + } else { + memset(SPT.base, 0, spt_min(sizeof (buf), SPT.end - SPT.base)); + } + + len = spt_min(len, spt_min(sizeof (buf), SPT.end - SPT.base) - 1); + memcpy(SPT.base, buf, len); + nul = SPT.base + len; + + if (nul < SPT.nul) { + *SPT.nul = '.'; + } else if (nul == SPT.nul && nul + 1 < SPT.end) { + *SPT.nul = ' '; + *++nul = '\0'; + } +} diff --git a/sys/contrib/openzfs/man/man1/arcstat.1 b/sys/contrib/openzfs/man/man1/arcstat.1 index 7c2caf698107..82358fa686b9 100644 --- a/sys/contrib/openzfs/man/man1/arcstat.1 +++ b/sys/contrib/openzfs/man/man1/arcstat.1 @@ -12,7 +12,7 @@ .\" Copyright (c) 2015 by Delphix. All rights reserved. .\" Copyright (c) 2020 by AJ Jordan. All rights reserved. .\" -.Dd May 26, 2021 +.Dd December 23, 2022 .Dt ARCSTAT 1 .Os . @@ -35,33 +35,83 @@ prints various ZFS ARC and L2ARC statistics in vmstat-like fashion: .It Sy c ARC target size .It Sy dh% -Demand data hit percentage +Demand hit percentage +.It Sy di% +Demand I/O hit percentage .It Sy dm% +Demand miss percentage +.It Sy ddh% +Demand data hit percentage +.It Sy ddi% +Demand data I/O hit percentage +.It Sy ddm% Demand data miss percentage +.It Sy dmh% +Demand metadata hit percentage +.It Sy dmi% +Demand metadata I/O hit percentage +.It Sy dmm% +Demand metadata miss percentage .It Sy mfu MFU list hits per second .It Sy mh% Metadata hit percentage +.It Sy mi% +Metadata I/O hit percentage .It Sy mm% Metadata miss percentage .It Sy mru MRU list hits per second .It Sy ph% Prefetch hits percentage +.It Sy pi% +Prefetch I/O hits percentage .It Sy pm% Prefetch miss percentage +.It Sy pdh% +Prefetch data hits percentage +.It Sy pdi% +Prefetch data I/O hits percentage +.It Sy pdm% +Prefetch data miss percentage +.It Sy pmh% +Prefetch metadata hits percentage +.It Sy pmi% +Prefetch metadata I/O hits percentage +.It Sy pmm% +Prefetch metadata miss percentage .It Sy dhit -Demand data hits per second +Demand hits per second +.It Sy dioh +Demand I/O hits per second .It Sy dmis +Demand misses per second +.It Sy ddhit +Demand data hits per second +.It Sy ddioh +Demand data I/O hits per second +.It Sy ddmis Demand data misses per second +.It Sy dmhit +Demand metadata hits per second +.It Sy dmioh +Demand metadata I/O hits per second +.It Sy dmmis +Demand metadata misses per second .It Sy hit% ARC hit percentage .It Sy hits -ARC reads per second +ARC hits per second +.It Sy ioh% +ARC I/O hits percentage +.It Sy iohs +ARC I/O hits per second .It Sy mfug MFU ghost list hits per second .It Sy mhit Metadata hits per second +.It Sy mioh +Metadata I/O hits per second .It Sy miss ARC misses per second .It Sy mmis @@ -70,8 +120,22 @@ Metadata misses per second MRU ghost list hits per second .It Sy phit Prefetch hits per second +.It Sy pioh +Prefetch I/O hits per second .It Sy pmis Prefetch misses per second +.It Sy pdhit +Prefetch data hits per second +.It Sy pdioh +Prefetch data I/O hits per second +.It Sy pdmis +Prefetch data misses per second +.It Sy pmhit +Prefetch metadata hits per second +.It Sy pmioh +Prefetch metadata I/O hits per second +.It Sy pmmis +Prefetch metadata misses per second .It Sy read Total ARC accesses per second .It Sy time @@ -81,8 +145,14 @@ ARC size .It Sy arcsz Alias for .Sy size +.It Sy unc +Uncached list hits per second .It Sy dread +Demand accesses per second +.It Sy ddread Demand data accesses per second +.It Sy dmread +Demand metadata accesses per second .It Sy eskip evict_skip per second .It Sy miss% @@ -91,6 +161,10 @@ ARC miss percentage Metadata accesses per second .It Sy pread Prefetch accesses per second +.It Sy pdread +Prefetch data accesses per second +.It Sy pmread +Prefetch metadata accesses per second .It Sy l2hit% L2ARC access hit percentage .It Sy l2hits diff --git a/sys/contrib/openzfs/man/man4/zfs.4 b/sys/contrib/openzfs/man/man4/zfs.4 index 8cef04cda990..e20d601340c6 100644 --- a/sys/contrib/openzfs/man/man4/zfs.4 +++ b/sys/contrib/openzfs/man/man4/zfs.4 @@ -15,7 +15,7 @@ .\" own identifying information: .\" Portions Copyright [yyyy] [name of copyright owner] .\" -.Dd November 9, 2022 +.Dd January 10, 2023 .Dt ZFS 4 .Os . @@ -239,6 +239,12 @@ relative to the pool. Make some blocks above a certain size be gang blocks. This option is used by the test suite to facilitate testing. . +.It Sy zfs_default_bs Ns = Ns Sy 9 Po 512 B Pc Pq int +Default dnode block size as a power of 2. +. +.It Sy zfs_default_ibs Ns = Ns Sy 17 Po 128 KiB Pc Pq int +Default dnode indirect block size as a power of 2. +. .It Sy zfs_history_output_max Ns = Ns Sy 1048576 Ns B Po 1 MiB Pc Pq u64 When attempting to log an output nvlist of an ioctl in the on-disk history, the output will not be stored if it is larger than this size (in bytes). @@ -496,6 +502,10 @@ prefetch the entire object (all leaf blocks). However, this is limited by .Sy dmu_prefetch_max . . +.It Sy zap_micro_max_size Ns = Ns Sy 131072 Ns B Po 128 KiB Pc Pq int +Maximum micro ZAP size. +A micro ZAP is upgraded to a fat ZAP, once it grows beyond the specified size. +. .It Sy zfetch_array_rd_sz Ns = Ns Sy 1048576 Ns B Po 1 MiB Pc Pq u64 If prefetching is enabled, disable prefetching for reads larger than this size. . @@ -1094,7 +1104,10 @@ This parameter takes precedence over .No See Sx ZFS TRANSACTION DELAY . .Pp Defaults to -.Sy physical_ram/4 , +.Sy min(physical_ram/4, 4GiB) , +or +.Sy min(physical_ram/4, 1GiB) +for 32-bit systems. . .It Sy zfs_dirty_data_max_max_percent Ns = Ns Sy 25 Ns % Pq uint Maximum allowable value of @@ -1391,7 +1404,7 @@ _ * 2 ZFS_DEBUG_DBUF_VERIFY Enable extra dbuf verifications. * 4 ZFS_DEBUG_DNODE_VERIFY Enable extra dnode verifications. 8 ZFS_DEBUG_SNAPNAMES Enable snapshot name verification. - 16 ZFS_DEBUG_MODIFY Check for illegally modified ARC buffers. +* 16 ZFS_DEBUG_MODIFY Check for illegally modified ARC buffers. 64 ZFS_DEBUG_ZIO_FREE Enable verification of block frees. 128 ZFS_DEBUG_HISTOGRAM_VERIFY Enable extra spacemap histogram verifications. 256 ZFS_DEBUG_METASLAB_VERIFY Verify space accounting on disk matches in-memory \fBrange_trees\fP. @@ -2196,6 +2209,13 @@ On very fragmented pools, lowering this .Pq typically to Sy 36 KiB can improve performance. . +.It Sy zil_min_commit_timeout Ns = Ns Sy 5000 Pq u64 +This sets the minimum delay in nanoseconds ZIL care to delay block commit, +waiting for more records. +If ZIL writes are too fast, kernel may not be able sleep for so short interval, +increasing log latency above allowed by +.Sy zfs_commit_timeout_pct . +. .It Sy zil_nocacheflush Ns = Ns Sy 0 Ns | Ns 1 Pq int Disable the cache flush commands that are normally sent to disk by the ZIL after an LWB write has completed. diff --git a/sys/contrib/openzfs/man/man7/vdevprops.7 b/sys/contrib/openzfs/man/man7/vdevprops.7 index af5d26f6b486..6eebfa0060de 100644 --- a/sys/contrib/openzfs/man/man7/vdevprops.7 +++ b/sys/contrib/openzfs/man/man7/vdevprops.7 @@ -43,7 +43,8 @@ section, below. .Ss Native Properties Every vdev has a set of properties that export statistics about the vdev as well as control various behaviors. -Properties are NOT inherited from top-level vdevs. +Properties are not inherited from top-level vdevs, with the exception of +checksum_n, checksum_t, io_n, and io_t. .Pp The values of numeric properties can be specified using human-readable suffixes .Po for example, @@ -114,9 +115,19 @@ The cumulative size of all operations of each type performed by this vdev If this device is currently being removed from the pool .El .Pp -The following native properties can be used to change the behavior of a ZFS -dataset. +The following native properties can be used to change the behavior of a vdev. .Bl -tag -width "allocating" +.It Sy checksum_n , checksum_t , io_n , io_t +Tune the fault management daemon by specifying checksum/io thresholds of +errors in seconds, respectively. +These properties can be set on leaf and top-level vdevs. +When the property is set on the leaf and top-level vdev, the value of the leaf +vdev will be used. +If the property is only set on the top-level vdev, this value will be used. +The value of these properties do not persist across vdev replacement. +For this reason, it is advisable to set the property on the top-level vdev - +not on the leaf vdev itself. +The default values are 10 errors in 600 seconds. .It Sy comment A text comment up to 8192 characters long .It Sy bootsize diff --git a/sys/contrib/openzfs/man/man8/zfs-send.8 b/sys/contrib/openzfs/man/man8/zfs-send.8 index 83f4e81da7b3..8cc6ae6ad59b 100644 --- a/sys/contrib/openzfs/man/man8/zfs-send.8 +++ b/sys/contrib/openzfs/man/man8/zfs-send.8 @@ -29,7 +29,7 @@ .\" Copyright 2018 Nexenta Systems, Inc. .\" Copyright 2019 Joyent, Inc. .\" -.Dd March 16, 2022 +.Dd January 12, 2023 .Dt ZFS-SEND 8 .Os . @@ -39,29 +39,29 @@ .Sh SYNOPSIS .Nm zfs .Cm send -.Op Fl DLPbcehnpsvw +.Op Fl DLPVbcehnpsvw .Op Fl R Op Fl X Ar dataset Ns Oo , Ns Ar dataset Oc Ns … .Op Oo Fl I Ns | Ns Fl i Oc Ar snapshot .Ar snapshot .Nm zfs .Cm send -.Op Fl DLPcensvw +.Op Fl DLPVcensvw .Op Fl i Ar snapshot Ns | Ns Ar bookmark .Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot .Nm zfs .Cm send .Fl -redact Ar redaction_bookmark -.Op Fl DLPcenpv +.Op Fl DLPVcenpv .Op Fl i Ar snapshot Ns | Ns Ar bookmark .Ar snapshot .Nm zfs .Cm send -.Op Fl Penv +.Op Fl PVenv .Fl t .Ar receive_resume_token .Nm zfs .Cm send -.Op Fl Pnv +.Op Fl PVnv .Fl S Ar filesystem .Nm zfs .Cm redact @@ -73,7 +73,7 @@ .It Xo .Nm zfs .Cm send -.Op Fl DLPbcehnpsvw +.Op Fl DLPVbcehnpsvw .Op Fl R Op Fl X Ar dataset Ns Oo , Ns Ar dataset Oc Ns … .Op Oo Fl I Ns | Ns Fl i Oc Ar snapshot .Ar snapshot @@ -142,6 +142,8 @@ If the flag is used to send encrypted datasets, then .Fl w must also be specified. +.It Fl V , -proctitle +Set the process title to a per-second report of how much data has been sent. .It Fl X , -exclude Ar dataset Ns Oo , Ns Ar dataset Oc Ns … With .Fl R , @@ -302,7 +304,7 @@ You will be able to receive your streams on future versions of ZFS. .It Xo .Nm zfs .Cm send -.Op Fl DLPcenvw +.Op Fl DLPVcenvw .Op Fl i Ar snapshot Ns | Ns Ar bookmark .Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot .Xc @@ -436,7 +438,7 @@ This information includes a per-second report of how much data has been sent. .Nm zfs .Cm send .Fl -redact Ar redaction_bookmark -.Op Fl DLPcenpv +.Op Fl DLPVcenpv .Op Fl i Ar snapshot Ns | Ns Ar bookmark .Ar snapshot .Xc @@ -530,7 +532,7 @@ raw sends and redacted sends cannot be combined at this time. .It Xo .Nm zfs .Cm send -.Op Fl Penv +.Op Fl PVenv .Fl t .Ar receive_resume_token .Xc @@ -545,7 +547,7 @@ for more details. .It Xo .Nm zfs .Cm send -.Op Fl Pnv +.Op Fl PVnv .Op Fl i Ar snapshot Ns | Ns Ar bookmark .Fl S .Ar filesystem diff --git a/sys/contrib/openzfs/man/man8/zfs.8 b/sys/contrib/openzfs/man/man8/zfs.8 index 52c07925764c..d12377f9b4f2 100644 --- a/sys/contrib/openzfs/man/man8/zfs.8 +++ b/sys/contrib/openzfs/man/man8/zfs.8 @@ -737,6 +737,10 @@ command will be undone if the share is ever unshared (like via a reboot). . .Sh ENVIRONMENT VARIABLES .Bl -tag -width "ZFS_MODULE_TIMEOUT" +.It Sy ZFS_COLOR +Use ANSI color in +.Nm zfs Cm diff +output. .It Sy ZFS_MOUNT_HELPER Cause .Nm zfs Cm mount diff --git a/sys/contrib/openzfs/module/Kbuild.in b/sys/contrib/openzfs/module/Kbuild.in index a39f9d9d0500..a1ea08cd4348 100644 --- a/sys/contrib/openzfs/module/Kbuild.in +++ b/sys/contrib/openzfs/module/Kbuild.in @@ -151,10 +151,10 @@ zfs-$(CONFIG_PPC) += $(addprefix icp/,$(ICP_OBJS_PPC_PPC64)) zfs-$(CONFIG_PPC64) += $(addprefix icp/,$(ICP_OBJS_PPC_PPC64)) $(addprefix $(obj)/icp/,$(ICP_OBJS) $(ICP_OBJS_X86) $(ICP_OBJS_X86_64) \ - $(ICP_OBJS_ARM64) $(ICP_OBJS_PPC_PPC64)) : asflags-y += -I$(icp_include) + $(ICP_OBJS_ARM64) $(ICP_OBJS_PPC_PPC64)) : asflags-y += -I$(icp_include) -I$(zfs_include)/os/linux/spl -I$(zfs_include) $(addprefix $(obj)/icp/,$(ICP_OBJS) $(ICP_OBJS_X86) $(ICP_OBJS_X86_64) \ - $(ICP_OBJS_ARM64) $(ICP_OBJS_PPC_PPC64)) : ccflags-y += -I$(icp_include) + $(ICP_OBJS_ARM64) $(ICP_OBJS_PPC_PPC64)) : ccflags-y += -I$(icp_include) -I$(zfs_include)/os/linux/spl -I$(zfs_include) # Suppress objtool "return with modified stack frame" warnings. OBJECT_FILES_NON_STANDARD_aesni-gcm-x86_64.o := y diff --git a/sys/contrib/openzfs/module/icp/algs/aes/aes_impl.c b/sys/contrib/openzfs/module/icp/algs/aes/aes_impl.c index 9d90914aacfa..9daa975226fe 100644 --- a/sys/contrib/openzfs/module/icp/algs/aes/aes_impl.c +++ b/sys/contrib/openzfs/module/icp/algs/aes/aes_impl.c @@ -211,7 +211,7 @@ aes_alloc_keysched(size_t *size, int kmflag) { aes_key_t *keysched; - keysched = (aes_key_t *)kmem_alloc(sizeof (aes_key_t), kmflag); + keysched = kmem_alloc(sizeof (aes_key_t), kmflag); if (keysched != NULL) { *size = sizeof (aes_key_t); return (keysched); diff --git a/sys/contrib/openzfs/module/icp/algs/aes/aes_impl_aesni.c b/sys/contrib/openzfs/module/icp/algs/aes/aes_impl_aesni.c index 5bc1bf92dad4..61085214c77b 100644 --- a/sys/contrib/openzfs/module/icp/algs/aes/aes_impl_aesni.c +++ b/sys/contrib/openzfs/module/icp/algs/aes/aes_impl_aesni.c @@ -26,15 +26,16 @@ #include #include +#include /* These functions are used to execute AES-NI instructions: */ -extern int rijndael_key_setup_enc_intel(uint32_t rk[], +extern ASMABI int rijndael_key_setup_enc_intel(uint32_t rk[], const uint32_t cipherKey[], uint64_t keyBits); -extern int rijndael_key_setup_dec_intel(uint32_t rk[], +extern ASMABI int rijndael_key_setup_dec_intel(uint32_t rk[], const uint32_t cipherKey[], uint64_t keyBits); -extern void aes_encrypt_intel(const uint32_t rk[], int Nr, +extern ASMABI void aes_encrypt_intel(const uint32_t rk[], int Nr, const uint32_t pt[4], uint32_t ct[4]); -extern void aes_decrypt_intel(const uint32_t rk[], int Nr, +extern ASMABI void aes_decrypt_intel(const uint32_t rk[], int Nr, const uint32_t ct[4], uint32_t pt[4]); diff --git a/sys/contrib/openzfs/module/icp/algs/blake3/blake3_impl.h b/sys/contrib/openzfs/module/icp/algs/blake3/blake3_impl.h index eef74eaa9098..ecb51e3a3010 100644 --- a/sys/contrib/openzfs/module/icp/algs/blake3/blake3_impl.h +++ b/sys/contrib/openzfs/module/icp/algs/blake3/blake3_impl.h @@ -35,6 +35,7 @@ extern "C" { #include #include #include +#include /* * Methods used to define BLAKE3 assembler implementations diff --git a/sys/contrib/openzfs/module/icp/algs/blake3/blake3_x86-64.c b/sys/contrib/openzfs/module/icp/algs/blake3/blake3_x86-64.c index 84f8331ab37c..03e557edff4a 100644 --- a/sys/contrib/openzfs/module/icp/algs/blake3/blake3_x86-64.c +++ b/sys/contrib/openzfs/module/icp/algs/blake3/blake3_x86-64.c @@ -29,15 +29,15 @@ (defined(__x86_64) && defined(HAVE_SSE2)) || \ (defined(__PPC64__) && defined(__LITTLE_ENDIAN__)) -extern void zfs_blake3_compress_in_place_sse2(uint32_t cv[8], +extern void ASMABI zfs_blake3_compress_in_place_sse2(uint32_t cv[8], const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, uint64_t counter, uint8_t flags); -extern void zfs_blake3_compress_xof_sse2(const uint32_t cv[8], +extern void ASMABI zfs_blake3_compress_xof_sse2(const uint32_t cv[8], const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, uint64_t counter, uint8_t flags, uint8_t out[64]); -extern void zfs_blake3_hash_many_sse2(const uint8_t * const *inputs, +extern void ASMABI zfs_blake3_hash_many_sse2(const uint8_t * const *inputs, size_t num_inputs, size_t blocks, const uint32_t key[8], uint64_t counter, boolean_t increment_counter, uint8_t flags, uint8_t flags_start, uint8_t flags_end, uint8_t *out); @@ -95,15 +95,15 @@ const blake3_ops_t blake3_sse2_impl = { (defined(__x86_64) && defined(HAVE_SSE2)) || \ (defined(__PPC64__) && defined(__LITTLE_ENDIAN__)) -extern void zfs_blake3_compress_in_place_sse41(uint32_t cv[8], +extern void ASMABI zfs_blake3_compress_in_place_sse41(uint32_t cv[8], const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, uint64_t counter, uint8_t flags); -extern void zfs_blake3_compress_xof_sse41(const uint32_t cv[8], +extern void ASMABI zfs_blake3_compress_xof_sse41(const uint32_t cv[8], const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, uint64_t counter, uint8_t flags, uint8_t out[64]); -extern void zfs_blake3_hash_many_sse41(const uint8_t * const *inputs, +extern void ASMABI zfs_blake3_hash_many_sse41(const uint8_t * const *inputs, size_t num_inputs, size_t blocks, const uint32_t key[8], uint64_t counter, boolean_t increment_counter, uint8_t flags, uint8_t flags_start, uint8_t flags_end, uint8_t *out); @@ -162,7 +162,7 @@ const blake3_ops_t blake3_sse41_impl = { #endif #if defined(__x86_64) && defined(HAVE_SSE4_1) && defined(HAVE_AVX2) -extern void zfs_blake3_hash_many_avx2(const uint8_t * const *inputs, +extern void ASMABI zfs_blake3_hash_many_avx2(const uint8_t * const *inputs, size_t num_inputs, size_t blocks, const uint32_t key[8], uint64_t counter, boolean_t increment_counter, uint8_t flags, uint8_t flags_start, uint8_t flags_end, uint8_t *out); @@ -194,15 +194,15 @@ const blake3_ops_t blake3_avx2_impl = { #endif #if defined(__x86_64) && defined(HAVE_AVX512F) && defined(HAVE_AVX512VL) -extern void zfs_blake3_compress_in_place_avx512(uint32_t cv[8], +extern void ASMABI zfs_blake3_compress_in_place_avx512(uint32_t cv[8], const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, uint64_t counter, uint8_t flags); -extern void zfs_blake3_compress_xof_avx512(const uint32_t cv[8], +extern void ASMABI zfs_blake3_compress_xof_avx512(const uint32_t cv[8], const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, uint64_t counter, uint8_t flags, uint8_t out[64]); -extern void zfs_blake3_hash_many_avx512(const uint8_t * const *inputs, +extern void ASMABI zfs_blake3_hash_many_avx512(const uint8_t * const *inputs, size_t num_inputs, size_t blocks, const uint32_t key[8], uint64_t counter, boolean_t increment_counter, uint8_t flags, uint8_t flags_start, uint8_t flags_end, uint8_t *out); diff --git a/sys/contrib/openzfs/module/icp/algs/modes/ccm.c b/sys/contrib/openzfs/module/icp/algs/modes/ccm.c index 4a8bb9bbc2c8..1371676d6e68 100644 --- a/sys/contrib/openzfs/module/icp/algs/modes/ccm.c +++ b/sys/contrib/openzfs/module/icp/algs/modes/ccm.c @@ -657,7 +657,7 @@ ccm_format_initial_blocks(uchar_t *nonce, ulong_t nonceSize, memset(&(b0[1+nonceSize]), 0, q); payloadSize = aes_ctx->ccm_data_len; - limit = 8 < q ? 8 : q; + limit = MIN(8, q); for (i = 0, j = 0, k = 15; i < limit; i++, j += 8, k--) { b0[k] = (uint8_t)((payloadSize >> j) & 0xFF); diff --git a/sys/contrib/openzfs/module/icp/algs/modes/gcm.c b/sys/contrib/openzfs/module/icp/algs/modes/gcm.c index 558a578090b2..472ec4bc9e13 100644 --- a/sys/contrib/openzfs/module/icp/algs/modes/gcm.c +++ b/sys/contrib/openzfs/module/icp/algs/modes/gcm.c @@ -59,7 +59,7 @@ boolean_t gcm_avx_can_use_movbe = B_FALSE; static boolean_t gcm_use_avx = B_FALSE; #define GCM_IMPL_USE_AVX (*(volatile boolean_t *)&gcm_use_avx) -extern boolean_t atomic_toggle_boolean_nv(volatile boolean_t *); +extern boolean_t ASMABI atomic_toggle_boolean_nv(volatile boolean_t *); static inline boolean_t gcm_avx_will_work(void); static inline void gcm_set_avx(boolean_t); @@ -653,7 +653,7 @@ gcm_init_ctx(gcm_ctx_t *gcm_ctx, char *param, size_t block_size, } gcm_ctx->gcm_htab_len = htab_len; gcm_ctx->gcm_Htable = - (uint64_t *)kmem_alloc(htab_len, KM_SLEEP); + kmem_alloc(htab_len, KM_SLEEP); if (gcm_ctx->gcm_Htable == NULL) { return (CRYPTO_HOST_MEMORY); @@ -728,7 +728,7 @@ gmac_init_ctx(gcm_ctx_t *gcm_ctx, char *param, size_t block_size, } gcm_ctx->gcm_htab_len = htab_len; gcm_ctx->gcm_Htable = - (uint64_t *)kmem_alloc(htab_len, KM_SLEEP); + kmem_alloc(htab_len, KM_SLEEP); if (gcm_ctx->gcm_Htable == NULL) { return (CRYPTO_HOST_MEMORY); @@ -1073,19 +1073,19 @@ MODULE_PARM_DESC(icp_gcm_impl, "Select gcm implementation."); static uint32_t gcm_avx_chunk_size = ((32 * 1024) / GCM_AVX_MIN_DECRYPT_BYTES) * GCM_AVX_MIN_DECRYPT_BYTES; -extern void clear_fpu_regs_avx(void); -extern void gcm_xor_avx(const uint8_t *src, uint8_t *dst); -extern void aes_encrypt_intel(const uint32_t rk[], int nr, +extern void ASMABI clear_fpu_regs_avx(void); +extern void ASMABI gcm_xor_avx(const uint8_t *src, uint8_t *dst); +extern void ASMABI aes_encrypt_intel(const uint32_t rk[], int nr, const uint32_t pt[4], uint32_t ct[4]); -extern void gcm_init_htab_avx(uint64_t *Htable, const uint64_t H[2]); -extern void gcm_ghash_avx(uint64_t ghash[2], const uint64_t *Htable, +extern void ASMABI gcm_init_htab_avx(uint64_t *Htable, const uint64_t H[2]); +extern void ASMABI gcm_ghash_avx(uint64_t ghash[2], const uint64_t *Htable, const uint8_t *in, size_t len); -extern size_t aesni_gcm_encrypt(const uint8_t *, uint8_t *, size_t, +extern size_t ASMABI aesni_gcm_encrypt(const uint8_t *, uint8_t *, size_t, const void *, uint64_t *, uint64_t *); -extern size_t aesni_gcm_decrypt(const uint8_t *, uint8_t *, size_t, +extern size_t ASMABI aesni_gcm_decrypt(const uint8_t *, uint8_t *, size_t, const void *, uint64_t *, uint64_t *); static inline boolean_t diff --git a/sys/contrib/openzfs/module/icp/algs/modes/gcm_pclmulqdq.c b/sys/contrib/openzfs/module/icp/algs/modes/gcm_pclmulqdq.c index c2c8bc221203..737d2e47ecb7 100644 --- a/sys/contrib/openzfs/module/icp/algs/modes/gcm_pclmulqdq.c +++ b/sys/contrib/openzfs/module/icp/algs/modes/gcm_pclmulqdq.c @@ -26,9 +26,10 @@ #include #include +#include /* These functions are used to execute pclmulqdq based assembly methods */ -extern void gcm_mul_pclmulqdq(uint64_t *, uint64_t *, uint64_t *); +extern void ASMABI gcm_mul_pclmulqdq(uint64_t *, uint64_t *, uint64_t *); #include diff --git a/sys/contrib/openzfs/module/icp/algs/sha2/sha2.c b/sys/contrib/openzfs/module/icp/algs/sha2/sha2.c index 151432f1a5df..e6bbe34eaa57 100644 --- a/sys/contrib/openzfs/module/icp/algs/sha2/sha2.c +++ b/sys/contrib/openzfs/module/icp/algs/sha2/sha2.c @@ -48,6 +48,7 @@ #define HAVE_HTONL #endif #include /* for _ILP32 */ +#include static void Encode(uint8_t *, uint32_t *, size_t); static void Encode64(uint8_t *, uint64_t *, size_t); @@ -57,8 +58,8 @@ static void Encode64(uint8_t *, uint64_t *, size_t); #define SHA512Transform(ctx, in) SHA512TransformBlocks((ctx), (in), 1) #define SHA256Transform(ctx, in) SHA256TransformBlocks((ctx), (in), 1) -void SHA512TransformBlocks(SHA2_CTX *ctx, const void *in, size_t num); -void SHA256TransformBlocks(SHA2_CTX *ctx, const void *in, size_t num); +void ASMABI SHA512TransformBlocks(SHA2_CTX *ctx, const void *in, size_t num); +void ASMABI SHA256TransformBlocks(SHA2_CTX *ctx, const void *in, size_t num); #else static void SHA256Transform(SHA2_CTX *, const uint8_t *); diff --git a/sys/contrib/openzfs/module/icp/api/kcf_ctxops.c b/sys/contrib/openzfs/module/icp/api/kcf_ctxops.c index 4fa281676b81..b8cd67ea7f67 100644 --- a/sys/contrib/openzfs/module/icp/api/kcf_ctxops.c +++ b/sys/contrib/openzfs/module/icp/api/kcf_ctxops.c @@ -88,7 +88,7 @@ crypto_create_ctx_template(crypto_mechanism_t *mech, crypto_key_t *key, if (error != CRYPTO_SUCCESS) return (error); - if ((ctx_tmpl = (kcf_ctx_template_t *)kmem_alloc( + if ((ctx_tmpl = kmem_alloc( sizeof (kcf_ctx_template_t), KM_SLEEP)) == NULL) { KCF_PROV_REFRELE(pd); return (CRYPTO_HOST_MEMORY); diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/aes/aes_aesni.S b/sys/contrib/openzfs/module/icp/asm-x86_64/aes/aes_aesni.S index f622235bd15b..4f3fe3ec65d6 100644 --- a/sys/contrib/openzfs/module/icp/asm-x86_64/aes/aes_aesni.S +++ b/sys/contrib/openzfs/module/icp/asm-x86_64/aes/aes_aesni.S @@ -378,7 +378,7 @@ rijndael_key_setup_enc_intel_local: FRAME_END RET -.align 4 +.balign 4 .Lenc_key192: cmp $192, %KEYSIZE32 jnz .Lenc_key128 @@ -415,7 +415,7 @@ rijndael_key_setup_enc_intel_local: FRAME_END RET -.align 4 +.balign 4 .Lenc_key128: cmp $128, %KEYSIZE32 jnz .Lenc_key_invalid_key_bits @@ -522,7 +522,7 @@ FRAME_BEGIN add %AESKEY, %ROUNDS64 mov %ROUNDS64, %ENDAESKEY -.align 4 +.balign 4 .Ldec_key_reorder_loop: movups (%AESKEY), %xmm0 movups (%ROUNDS64), %xmm1 @@ -533,7 +533,7 @@ FRAME_BEGIN cmp %AESKEY, %ROUNDS64 ja .Ldec_key_reorder_loop -.align 4 +.balign 4 .Ldec_key_inv_loop: movups (%rcx), %xmm0 // Convert an encryption round key to a form usable for decryption @@ -622,7 +622,7 @@ ENTRY_NP(aes_encrypt_intel) movups -0x50(%KEYP), %KEY aesenc %KEY, %STATE -.align 4 +.balign 4 .Lenc192: // AES 192 and 256 movups -0x40(%KEYP), %KEY @@ -630,7 +630,7 @@ ENTRY_NP(aes_encrypt_intel) movups -0x30(%KEYP), %KEY aesenc %KEY, %STATE -.align 4 +.balign 4 .Lenc128: // AES 128, 192, and 256 movups -0x20(%KEYP), %KEY @@ -705,7 +705,7 @@ ENTRY_NP(aes_decrypt_intel) movups -0x50(%KEYP), %KEY aesdec %KEY, %STATE -.align 4 +.balign 4 .Ldec192: // AES 192 and 256 movups -0x40(%KEYP), %KEY @@ -713,7 +713,7 @@ ENTRY_NP(aes_decrypt_intel) movups -0x30(%KEYP), %KEY aesdec %KEY, %STATE -.align 4 +.balign 4 .Ldec128: // AES 128, 192, and 256 movups -0x20(%KEYP), %KEY diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/aes/aes_amd64.S b/sys/contrib/openzfs/module/icp/asm-x86_64/aes/aes_amd64.S index a0525dd464f5..c4870a28ead6 100644 --- a/sys/contrib/openzfs/module/icp/asm-x86_64/aes/aes_amd64.S +++ b/sys/contrib/openzfs/module/icp/asm-x86_64/aes/aes_amd64.S @@ -188,13 +188,13 @@ #include void aes_encrypt_amd64(const uint32_t rk[], int Nr, const uint32_t pt[4], - uint32_t ct[4]) { - (void) rk, (void) Nr, (void) pt, (void) ct; + uint32_t ct[4]) { + (void) rk, (void) Nr, (void) pt, (void) ct; } void aes_decrypt_amd64(const uint32_t rk[], int Nr, const uint32_t ct[4], - uint32_t pt[4]) { - (void) rk, (void) Nr, (void) pt, (void) ct; + uint32_t pt[4]) { + (void) rk, (void) Nr, (void) pt, (void) ct; } @@ -221,23 +221,23 @@ aes_decrypt_amd64(const uint32_t rk[], int Nr, const uint32_t ct[4], // finite field multiplies by {02}, {04} and {08} -#define f2(x) [[x<<1]^[[[x>>7]&1]*0x11b]] -#define f4(x) [[x<<2]^[[[x>>6]&1]*0x11b]^[[[x>>6]&2]*0x11b]] -#define f8(x) [[x<<3]^[[[x>>5]&1]*0x11b]^[[[x>>5]&2]*0x11b]^[[[x>>5]&4]*0x11b]] +#define f2(x) ((x<<1)^(((x>>7)&1)*0x11b)) +#define f4(x) ((x<<2)^(((x>>6)&1)*0x11b)^(((x>>6)&2)*0x11b)) +#define f8(x) ((x<<3)^(((x>>5)&1)*0x11b)^(((x>>5)&2)*0x11b)^(((x>>5)&4)*0x11b)) // finite field multiplies required in table generation -#define f3(x) [[f2(x)] ^ [x]] -#define f9(x) [[f8(x)] ^ [x]] -#define fb(x) [[f8(x)] ^ [f2(x)] ^ [x]] -#define fd(x) [[f8(x)] ^ [f4(x)] ^ [x]] -#define fe(x) [[f8(x)] ^ [f4(x)] ^ [f2(x)]] +#define f3(x) ((f2(x)) ^ (x)) +#define f9(x) ((f8(x)) ^ (x)) +#define fb(x) ((f8(x)) ^ (f2(x)) ^ (x)) +#define fd(x) ((f8(x)) ^ (f4(x)) ^ (x)) +#define fe(x) ((f8(x)) ^ (f4(x)) ^ (f2(x))) // macros for expanding S-box data -#define u8(x) [f2(x)], [x], [x], [f3(x)], [f2(x)], [x], [x], [f3(x)] -#define v8(x) [fe(x)], [f9(x)], [fd(x)], [fb(x)], [fe(x)], [f9(x)], [fd(x)], [x] -#define w8(x) [x], 0, 0, 0, [x], 0, 0, 0 +#define u8(x) (f2(x)), (x), (x), (f3(x)), (f2(x)), (x), (x), (f3(x)) +#define v8(x) (fe(x)), (f9(x)), (fd(x)), (fb(x)), (fe(x)), (f9(x)), (fd(x)), (x) +#define w8(x) (x), 0, 0, 0, (x), 0, 0, 0 #define enc_vals(x) \ .byte x(0x63),x(0x7c),x(0x77),x(0x7b),x(0xf2),x(0x6b),x(0x6f),x(0xc5); \ @@ -693,8 +693,8 @@ aes_decrypt_amd64(const uint32_t rk[], int Nr, const uint32_t ct[4], * int aes_encrypt(const unsigned char *in, * unsigned char *out, const aes_encrypt_ctx cx[1])/ */ -.section .rodata -.align 64 +SECTION_STATIC +.balign 64 enc_tab: enc_vals(u8) #ifdef LAST_ROUND_TABLES @@ -718,7 +718,7 @@ ENTRY_NP(aes_encrypt_amd64) #else // OpenSolaris OS interface - sub $[4*8], %rsp // Make room on stack to save registers + sub $(4*8), %rsp // Make room on stack to save registers mov %rcx, (%rsp) // Save output pointer (P4) on stack mov %rdi, %r8 // context (P1) mov %rdx, %rdi // P3: save input pointer @@ -749,11 +749,11 @@ ENTRY_NP(aes_encrypt_amd64) lea (kptr,%rsi), kptr // Jump based on byte key length * 16: - cmp $[10*16], %esi + cmp $(10*16), %esi je 3f - cmp $[12*16], %esi + cmp $(12*16), %esi je 2f - cmp $[14*16], %esi + cmp $(14*16), %esi je 1f mov $-1, %rax // error jmp 4f @@ -785,7 +785,7 @@ ENTRY_NP(aes_encrypt_amd64) mov 1*8(%rsp), %rbx mov 2*8(%rsp), %rbp mov 3*8(%rsp), %r12 - add $[4*8], %rsp + add $(4*8), %rsp RET SET_SIZE(aes_encrypt_amd64) @@ -799,8 +799,8 @@ ENTRY_NP(aes_encrypt_amd64) * int aes_decrypt(const unsigned char *in, * unsigned char *out, const aes_encrypt_ctx cx[1])/ */ -.section .rodata -.align 64 +SECTION_STATIC +.balign 64 dec_tab: dec_vals(v8) #ifdef LAST_ROUND_TABLES @@ -824,7 +824,7 @@ ENTRY_NP(aes_decrypt_amd64) #else // OpenSolaris OS interface - sub $[4*8], %rsp // Make room on stack to save registers + sub $(4*8), %rsp // Make room on stack to save registers mov %rcx, (%rsp) // Save output pointer (P4) on stack mov %rdi, %r8 // context (P1) mov %rdx, %rdi // P3: save input pointer @@ -861,11 +861,11 @@ ENTRY_NP(aes_decrypt_amd64) xor rofs+12(%rdi), %edx // Jump based on byte key length * 16: - cmp $[10*16], %esi + cmp $(10*16), %esi je 3f - cmp $[12*16], %esi + cmp $(12*16), %esi je 2f - cmp $[14*16], %esi + cmp $(14*16), %esi je 1f mov $-1, %rax // error jmp 4f @@ -897,11 +897,11 @@ ENTRY_NP(aes_decrypt_amd64) mov 1*8(%rsp), %rbx mov 2*8(%rsp), %rbp mov 3*8(%rsp), %r12 - add $[4*8], %rsp + add $(4*8), %rsp RET SET_SIZE(aes_decrypt_amd64) -#endif /* lint || __lint */ +#endif /* lint || __lint */ #ifdef __ELF__ .section .note.GNU-stack,"",%progbits diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_avx2.S b/sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_avx2.S index cb08430b81ed..8f9e766486f1 100644 --- a/sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_avx2.S +++ b/sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_avx2.S @@ -31,12 +31,9 @@ #include .intel_syntax noprefix -.global zfs_blake3_hash_many_avx2 .text -.type zfs_blake3_hash_many_avx2,@function -.p2align 6 -zfs_blake3_hash_many_avx2: +ENTRY_ALIGN(zfs_blake3_hash_many_avx2, 64) ENDBR push r15 push r14 @@ -1791,13 +1788,10 @@ zfs_blake3_hash_many_avx2: vmovdqu xmmword ptr [rbx+0x10], xmm1 jmp 4b -.size zfs_blake3_hash_many_avx2, . - zfs_blake3_hash_many_avx2 +SET_SIZE(zfs_blake3_hash_many_avx2) -#ifdef __APPLE__ -.static_data -#else +SECTION_STATIC .section .rodata -#endif .p2align 6 ADD0: diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_avx512.S b/sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_avx512.S index 960406ea2c01..39830f1556bb 100644 --- a/sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_avx512.S +++ b/sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_avx512.S @@ -31,17 +31,9 @@ #include .intel_syntax noprefix -.global zfs_blake3_hash_many_avx512 -.global zfs_blake3_compress_in_place_avx512 -.global zfs_blake3_compress_xof_avx512 .text -.type zfs_blake3_hash_many_avx512,@function -.type zfs_blake3_compress_xof_avx512,@function -.type zfs_blake3_compress_in_place_avx512,@function - -.p2align 6 -zfs_blake3_hash_many_avx512: +ENTRY_ALIGN(zfs_blake3_hash_many_avx512, 64) ENDBR push r15 push r14 @@ -2397,8 +2389,9 @@ zfs_blake3_hash_many_avx512: vmovdqu xmmword ptr [rbx], xmm0 vmovdqu xmmword ptr [rbx+0x10], xmm1 jmp 4b -.p2align 6 -zfs_blake3_compress_in_place_avx512: +SET_SIZE(zfs_blake3_hash_many_avx512) + +ENTRY_ALIGN(zfs_blake3_compress_in_place_avx512, 64) ENDBR vmovdqu xmm0, xmmword ptr [rdi] vmovdqu xmm1, xmmword ptr [rdi+0x10] @@ -2478,9 +2471,9 @@ zfs_blake3_compress_in_place_avx512: vmovdqu xmmword ptr [rdi], xmm0 vmovdqu xmmword ptr [rdi+0x10], xmm1 RET +SET_SIZE(zfs_blake3_compress_in_place_avx512) -.p2align 6 -zfs_blake3_compress_xof_avx512: +ENTRY_ALIGN(zfs_blake3_compress_xof_avx512, 64) ENDBR vmovdqu xmm0, xmmword ptr [rdi] vmovdqu xmm1, xmmword ptr [rdi+0x10] @@ -2564,16 +2557,9 @@ zfs_blake3_compress_xof_avx512: vmovdqu xmmword ptr [r9+0x20], xmm2 vmovdqu xmmword ptr [r9+0x30], xmm3 RET +SET_SIZE(zfs_blake3_compress_xof_avx512) -.size zfs_blake3_hash_many_avx512, . - zfs_blake3_hash_many_avx512 -.size zfs_blake3_compress_in_place_avx512, . - zfs_blake3_compress_in_place_avx512 -.size zfs_blake3_compress_xof_avx512, . - zfs_blake3_compress_xof_avx512 - -#ifdef __APPLE__ -.static_data -#else -.section .rodata -#endif +SECTION_STATIC .p2align 6 INDEX0: diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_sse2.S b/sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_sse2.S index c4290aaa8faf..78c4ffac53a8 100644 --- a/sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_sse2.S +++ b/sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_sse2.S @@ -31,17 +31,10 @@ #include .intel_syntax noprefix -.global zfs_blake3_hash_many_sse2 -.global zfs_blake3_compress_in_place_sse2 -.global zfs_blake3_compress_xof_sse2 -.text -.type zfs_blake3_hash_many_sse2,@function -.type zfs_blake3_compress_in_place_sse2,@function -.type zfs_blake3_compress_xof_sse2,@function +SECTION_TEXT - .p2align 6 -zfs_blake3_hash_many_sse2: +ENTRY_ALIGN(zfs_blake3_hash_many_sse2, 64) ENDBR push r15 push r14 @@ -2037,9 +2030,9 @@ zfs_blake3_hash_many_sse2: movups xmmword ptr [rbx], xmm0 movups xmmword ptr [rbx+0x10], xmm1 jmp 4b +SET_SIZE(zfs_blake3_hash_many_sse2) -.p2align 6 -zfs_blake3_compress_in_place_sse2: +ENTRY_ALIGN(zfs_blake3_compress_in_place_sse2, 64) ENDBR movups xmm0, xmmword ptr [rdi] movups xmm1, xmmword ptr [rdi+0x10] @@ -2148,9 +2141,9 @@ zfs_blake3_compress_in_place_sse2: movups xmmword ptr [rdi], xmm0 movups xmmword ptr [rdi+0x10], xmm1 RET +SET_SIZE(zfs_blake3_compress_in_place_sse2) -.p2align 6 -zfs_blake3_compress_xof_sse2: +ENTRY_ALIGN(zfs_blake3_compress_xof_sse2, 64) ENDBR movups xmm0, xmmword ptr [rdi] movups xmm1, xmmword ptr [rdi+0x10] @@ -2267,21 +2260,14 @@ zfs_blake3_compress_xof_sse2: movups xmmword ptr [r9+0x20], xmm2 movups xmmword ptr [r9+0x30], xmm3 RET +SET_SIZE(zfs_blake3_compress_xof_sse2) -.size zfs_blake3_hash_many_sse2, . - zfs_blake3_hash_many_sse2 -.size zfs_blake3_compress_in_place_sse2, . - zfs_blake3_compress_in_place_sse2 -.size zfs_blake3_compress_xof_sse2, . - zfs_blake3_compress_xof_sse2 - -#ifdef __APPLE__ -.static_data -#else -.section .rodata -#endif +SECTION_STATIC .p2align 6 BLAKE3_IV: .long 0x6A09E667, 0xBB67AE85 .long 0x3C6EF372, 0xA54FF53A -ADD0: +ADD0: .long 0, 1, 2, 3 ADD1: .long 4, 4, 4, 4 diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_sse41.S b/sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_sse41.S index 45b90cc9ed89..8ee7be75a0e1 100644 --- a/sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_sse41.S +++ b/sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_sse41.S @@ -31,17 +31,10 @@ #include .intel_syntax noprefix -.global zfs_blake3_compress_in_place_sse41 -.global zfs_blake3_compress_xof_sse41 -.global zfs_blake3_hash_many_sse41 .text -.type zfs_blake3_hash_many_sse41,@function -.type zfs_blake3_compress_in_place_sse41,@function -.type zfs_blake3_compress_xof_sse41,@function -.p2align 6 -zfs_blake3_hash_many_sse41: +ENTRY_ALIGN(zfs_blake3_hash_many_sse41, 64) ENDBR push r15 push r14 @@ -1800,8 +1793,9 @@ zfs_blake3_hash_many_sse41: movups xmmword ptr [rbx], xmm0 movups xmmword ptr [rbx+0x10], xmm1 jmp 4b -.p2align 6 -zfs_blake3_compress_in_place_sse41: +SET_SIZE(zfs_blake3_hash_many_sse41) + +ENTRY_ALIGN(zfs_blake3_compress_in_place_sse41, 64) ENDBR movups xmm0, xmmword ptr [rdi] movups xmm1, xmmword ptr [rdi+0x10] @@ -1899,8 +1893,9 @@ zfs_blake3_compress_in_place_sse41: movups xmmword ptr [rdi], xmm0 movups xmmword ptr [rdi+0x10], xmm1 RET -.p2align 6 -zfs_blake3_compress_xof_sse41: +SET_SIZE(zfs_blake3_compress_in_place_sse41) + +ENTRY_ALIGN(zfs_blake3_compress_xof_sse41, 64) ENDBR movups xmm0, xmmword ptr [rdi] movups xmm1, xmmword ptr [rdi+0x10] @@ -2006,16 +2001,10 @@ zfs_blake3_compress_xof_sse41: movups xmmword ptr [r9+0x20], xmm2 movups xmmword ptr [r9+0x30], xmm3 RET +SET_SIZE(zfs_blake3_compress_xof_sse41) -.size zfs_blake3_hash_many_sse41, . - zfs_blake3_hash_many_sse41 -.size zfs_blake3_compress_in_place_sse41, . - zfs_blake3_compress_in_place_sse41 -.size zfs_blake3_compress_xof_sse41, . - zfs_blake3_compress_xof_sse41 +SECTION_STATIC -#ifdef __APPLE__ -.static_data -#else -.section .rodata -#endif .p2align 6 BLAKE3_IV: .long 0x6A09E667, 0xBB67AE85 @@ -2024,7 +2013,7 @@ ROT16: .byte 2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13 ROT8: .byte 1, 2, 3, 0, 5, 6, 7, 4, 9, 10, 11, 8, 13, 14, 15, 12 -ADD0: +ADD0: .long 0, 1, 2, 3 ADD1: .long 4, 4, 4, 4 diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/modes/aesni-gcm-x86_64.S b/sys/contrib/openzfs/module/icp/asm-x86_64/modes/aesni-gcm-x86_64.S index cf17b3768712..165492a0ed76 100644 --- a/sys/contrib/openzfs/module/icp/asm-x86_64/modes/aesni-gcm-x86_64.S +++ b/sys/contrib/openzfs/module/icp/asm-x86_64/modes/aesni-gcm-x86_64.S @@ -50,14 +50,16 @@ #define _ASM #include +/* Windows userland links with OpenSSL */ +#if !defined (_WIN32) || defined (_KERNEL) + .extern gcm_avx_can_use_movbe .text #ifdef HAVE_MOVBE -.type _aesni_ctr32_ghash_6x,@function -.align 32 -_aesni_ctr32_ghash_6x: +.balign 32 +FUNCTION(_aesni_ctr32_ghash_6x) .cfi_startproc ENDBR vmovdqu 32(%r11),%xmm2 @@ -73,7 +75,7 @@ _aesni_ctr32_ghash_6x: vmovdqu %xmm4,16+8(%rsp) jmp .Loop6x -.align 32 +.balign 32 .Loop6x: addl $100663296,%ebx jc .Lhandle_ctr32 @@ -285,7 +287,7 @@ _aesni_ctr32_ghash_6x: vmovups 224-128(%rcx),%xmm1 jmp .Lenc_tail -.align 32 +.balign 32 .Lhandle_ctr32: vmovdqu (%r11),%xmm0 vpshufb %xmm0,%xmm1,%xmm6 @@ -307,7 +309,7 @@ _aesni_ctr32_ghash_6x: vpshufb %xmm0,%xmm1,%xmm1 jmp .Lresume_ctr32 -.align 32 +.balign 32 .Lenc_tail: vaesenc %xmm15,%xmm9,%xmm9 vmovdqu %xmm7,16+8(%rsp) @@ -369,12 +371,11 @@ _aesni_ctr32_ghash_6x: RET .cfi_endproc -.size _aesni_ctr32_ghash_6x,.-_aesni_ctr32_ghash_6x +SET_SIZE(_aesni_ctr32_ghash_6x) #endif /* ifdef HAVE_MOVBE */ -.type _aesni_ctr32_ghash_no_movbe_6x,@function -.align 32 -_aesni_ctr32_ghash_no_movbe_6x: +.balign 32 +FUNCTION(_aesni_ctr32_ghash_no_movbe_6x) .cfi_startproc ENDBR vmovdqu 32(%r11),%xmm2 @@ -390,7 +391,7 @@ _aesni_ctr32_ghash_no_movbe_6x: vmovdqu %xmm4,16+8(%rsp) jmp .Loop6x_nmb -.align 32 +.balign 32 .Loop6x_nmb: addl $100663296,%ebx jc .Lhandle_ctr32_nmb @@ -614,7 +615,7 @@ _aesni_ctr32_ghash_no_movbe_6x: vmovups 224-128(%rcx),%xmm1 jmp .Lenc_tail_nmb -.align 32 +.balign 32 .Lhandle_ctr32_nmb: vmovdqu (%r11),%xmm0 vpshufb %xmm0,%xmm1,%xmm6 @@ -636,7 +637,7 @@ _aesni_ctr32_ghash_no_movbe_6x: vpshufb %xmm0,%xmm1,%xmm1 jmp .Lresume_ctr32_nmb -.align 32 +.balign 32 .Lenc_tail_nmb: vaesenc %xmm15,%xmm9,%xmm9 vmovdqu %xmm7,16+8(%rsp) @@ -698,12 +699,9 @@ _aesni_ctr32_ghash_no_movbe_6x: RET .cfi_endproc -.size _aesni_ctr32_ghash_no_movbe_6x,.-_aesni_ctr32_ghash_no_movbe_6x +SET_SIZE(_aesni_ctr32_ghash_no_movbe_6x) -.globl aesni_gcm_decrypt -.type aesni_gcm_decrypt,@function -.align 32 -aesni_gcm_decrypt: +ENTRY_ALIGN(aesni_gcm_decrypt, 32) .cfi_startproc ENDBR xorq %r10,%r10 @@ -818,10 +816,10 @@ aesni_gcm_decrypt: movq %r10,%rax RET .cfi_endproc -.size aesni_gcm_decrypt,.-aesni_gcm_decrypt -.type _aesni_ctr32_6x,@function -.align 32 -_aesni_ctr32_6x: +SET_SIZE(aesni_gcm_decrypt) + +.balign 32 +FUNCTION(_aesni_ctr32_6x) .cfi_startproc ENDBR vmovdqu 0-128(%rcx),%xmm4 @@ -845,7 +843,7 @@ _aesni_ctr32_6x: vpxor %xmm4,%xmm14,%xmm14 jmp .Loop_ctr32 -.align 16 +.balign 16 .Loop_ctr32: vaesenc %xmm15,%xmm9,%xmm9 vaesenc %xmm15,%xmm10,%xmm10 @@ -888,7 +886,7 @@ _aesni_ctr32_6x: leaq 96(%rsi),%rsi RET -.align 32 +.balign 32 .Lhandle_ctr32_2: vpshufb %xmm0,%xmm1,%xmm6 vmovdqu 48(%r11),%xmm5 @@ -911,12 +909,9 @@ _aesni_ctr32_6x: vpxor %xmm4,%xmm14,%xmm14 jmp .Loop_ctr32 .cfi_endproc -.size _aesni_ctr32_6x,.-_aesni_ctr32_6x +SET_SIZE(_aesni_ctr32_6x) -.globl aesni_gcm_encrypt -.type aesni_gcm_encrypt,@function -.align 32 -aesni_gcm_encrypt: +ENTRY_ALIGN(aesni_gcm_encrypt, 32) .cfi_startproc ENDBR xorq %r10,%r10 @@ -1196,7 +1191,9 @@ aesni_gcm_encrypt: movq %r10,%rax RET .cfi_endproc -.size aesni_gcm_encrypt,.-aesni_gcm_encrypt +SET_SIZE(aesni_gcm_encrypt) + +#endif /* !_WIN32 || _KERNEL */ /* Some utility routines */ @@ -1204,13 +1201,10 @@ aesni_gcm_encrypt: * clear all fpu registers * void clear_fpu_regs_avx(void); */ -.globl clear_fpu_regs_avx -.type clear_fpu_regs_avx,@function -.align 32 -clear_fpu_regs_avx: +ENTRY_ALIGN(clear_fpu_regs_avx, 32) vzeroall RET -.size clear_fpu_regs_avx,.-clear_fpu_regs_avx +SET_SIZE(clear_fpu_regs_avx) /* * void gcm_xor_avx(const uint8_t *src, uint8_t *dst); @@ -1219,25 +1213,19 @@ clear_fpu_regs_avx: * stores the result at `dst'. The XOR is performed using FPU registers, * so make sure FPU state is saved when running this in the kernel. */ -.globl gcm_xor_avx -.type gcm_xor_avx,@function -.align 32 -gcm_xor_avx: +ENTRY_ALIGN(gcm_xor_avx, 32) movdqu (%rdi), %xmm0 movdqu (%rsi), %xmm1 pxor %xmm1, %xmm0 movdqu %xmm0, (%rsi) RET -.size gcm_xor_avx,.-gcm_xor_avx +SET_SIZE(gcm_xor_avx) /* * Toggle a boolean_t value atomically and return the new value. * boolean_t atomic_toggle_boolean_nv(volatile boolean_t *); */ -.globl atomic_toggle_boolean_nv -.type atomic_toggle_boolean_nv,@function -.align 32 -atomic_toggle_boolean_nv: +ENTRY_ALIGN(atomic_toggle_boolean_nv, 32) xorl %eax, %eax lock xorl $1, (%rdi) @@ -1245,10 +1233,11 @@ atomic_toggle_boolean_nv: movl $1, %eax 1: RET -.size atomic_toggle_boolean_nv,.-atomic_toggle_boolean_nv +SET_SIZE(atomic_toggle_boolean_nv) -.pushsection .rodata -.align 64 +SECTION_STATIC + +.balign 64 .Lbswap_mask: .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 .Lpoly: @@ -1260,8 +1249,7 @@ atomic_toggle_boolean_nv: .Lone_lsb: .byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 .byte 65,69,83,45,78,73,32,71,67,77,32,109,111,100,117,108,101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 -.align 64 -.popsection +.balign 64 /* Mark the stack non-executable. */ #if defined(__linux__) && defined(__ELF__) diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/modes/gcm_pclmulqdq.S b/sys/contrib/openzfs/module/icp/asm-x86_64/modes/gcm_pclmulqdq.S index eb9514e10cda..e40b3df32753 100644 --- a/sys/contrib/openzfs/module/icp/asm-x86_64/modes/gcm_pclmulqdq.S +++ b/sys/contrib/openzfs/module/icp/asm-x86_64/modes/gcm_pclmulqdq.S @@ -102,7 +102,7 @@ gcm_mul_pclmulqdq(uint64_t *x_in, uint64_t *y, uint64_t *res) { // static uint8_t byte_swap16_mask[] = { // 15, 14, 13, 12, 11, 10, 9, 8, 7, 6 ,5, 4, 3, 2, 1, 0 }; .section .rodata -.align XMM_ALIGN +.balign XMM_ALIGN .Lbyte_swap16_mask: .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/modes/ghash-x86_64.S b/sys/contrib/openzfs/module/icp/asm-x86_64/modes/ghash-x86_64.S index bf3724a23eae..f62e056d4b64 100644 --- a/sys/contrib/openzfs/module/icp/asm-x86_64/modes/ghash-x86_64.S +++ b/sys/contrib/openzfs/module/icp/asm-x86_64/modes/ghash-x86_64.S @@ -102,12 +102,13 @@ .text -.globl gcm_gmult_clmul -.type gcm_gmult_clmul,@function -.align 16 -gcm_gmult_clmul: +/* Windows userland links with OpenSSL */ +#if !defined (_WIN32) || defined (_KERNEL) +ENTRY_ALIGN(gcm_gmult_clmul, 16) + .cfi_startproc ENDBR + .L_gmult_clmul: movdqu (%rdi),%xmm0 movdqa .Lbswap_mask(%rip),%xmm5 @@ -155,12 +156,10 @@ gcm_gmult_clmul: movdqu %xmm0,(%rdi) RET .cfi_endproc -.size gcm_gmult_clmul,.-gcm_gmult_clmul +SET_SIZE(gcm_gmult_clmul) +#endif /* !_WIN32 || _KERNEL */ -.globl gcm_init_htab_avx -.type gcm_init_htab_avx,@function -.align 32 -gcm_init_htab_avx: +ENTRY_ALIGN(gcm_init_htab_avx, 32) .cfi_startproc ENDBR vzeroupper @@ -189,7 +188,7 @@ gcm_init_htab_avx: vpxor %xmm2,%xmm6,%xmm6 movq $4,%r10 jmp .Linit_start_avx -.align 32 +.balign 32 .Linit_loop_avx: vpalignr $8,%xmm3,%xmm4,%xmm5 vmovdqu %xmm5,-16(%rdi) @@ -269,21 +268,17 @@ gcm_init_htab_avx: vzeroupper RET .cfi_endproc -.size gcm_init_htab_avx,.-gcm_init_htab_avx +SET_SIZE(gcm_init_htab_avx) -.globl gcm_gmult_avx -.type gcm_gmult_avx,@function -.align 32 -gcm_gmult_avx: +#if !defined (_WIN32) || defined (_KERNEL) +ENTRY_ALIGN(gcm_gmult_avx, 32) .cfi_startproc ENDBR jmp .L_gmult_clmul .cfi_endproc -.size gcm_gmult_avx,.-gcm_gmult_avx -.globl gcm_ghash_avx -.type gcm_ghash_avx,@function -.align 32 -gcm_ghash_avx: +SET_SIZE(gcm_gmult_avx) + +ENTRY_ALIGN(gcm_ghash_avx, 32) .cfi_startproc ENDBR vzeroupper @@ -391,7 +386,7 @@ gcm_ghash_avx: subq $0x80,%rcx jmp .Loop8x_avx -.align 32 +.balign 32 .Loop8x_avx: vpunpckhqdq %xmm15,%xmm15,%xmm8 vmovdqu 112(%rdx),%xmm14 @@ -511,7 +506,7 @@ gcm_ghash_avx: addq $0x80,%rcx jmp .Ltail_no_xor_avx -.align 32 +.balign 32 .Lshort_avx: vmovdqu -16(%rdx,%rcx,1),%xmm14 leaq (%rdx,%rcx,1),%rdx @@ -615,7 +610,7 @@ gcm_ghash_avx: subq $0x10,%rcx jmp .Ltail_avx -.align 32 +.balign 32 .Ltail_avx: vpxor %xmm10,%xmm15,%xmm15 .Ltail_no_xor_avx: @@ -658,10 +653,12 @@ gcm_ghash_avx: vzeroupper RET .cfi_endproc -.size gcm_ghash_avx,.-gcm_ghash_avx +SET_SIZE(gcm_ghash_avx) -.pushsection .rodata -.align 64 +#endif /* !_WIN32 || _KERNEL */ + +SECTION_STATIC +.balign 64 .Lbswap_mask: .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 .L0x1c2_polynomial: @@ -670,14 +667,14 @@ gcm_ghash_avx: .long 7,0,7,0 .L7_mask_poly: .long 7,0,450,0 -.align 64 -.type .Lrem_4bit,@object +.balign 64 +SET_OBJ(.Lrem_4bit) .Lrem_4bit: .long 0,0,0,471859200,0,943718400,0,610271232 .long 0,1887436800,0,1822425088,0,1220542464,0,1423966208 .long 0,3774873600,0,4246732800,0,3644850176,0,3311403008 .long 0,2441084928,0,2376073216,0,2847932416,0,3051356160 -.type .Lrem_8bit,@object +SET_OBJ(.Lrem_8bit) .Lrem_8bit: .value 0x0000,0x01C2,0x0384,0x0246,0x0708,0x06CA,0x048C,0x054E .value 0x0E10,0x0FD2,0x0D94,0x0C56,0x0918,0x08DA,0x0A9C,0x0B5E @@ -713,8 +710,7 @@ gcm_ghash_avx: .value 0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE .byte 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 -.align 64 -.popsection +.balign 64 /* Mark the stack non-executable. */ #if defined(__linux__) && defined(__ELF__) diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/sha2/sha256_impl.S b/sys/contrib/openzfs/module/icp/asm-x86_64/sha2/sha256_impl.S index 60d34b4a3be0..f3d701528459 100644 --- a/sys/contrib/openzfs/module/icp/asm-x86_64/sha2/sha256_impl.S +++ b/sys/contrib/openzfs/module/icp/asm-x86_64/sha2/sha256_impl.S @@ -133,7 +133,7 @@ ENTRY_NP(SHA256TransformBlocks) mov 4*7(%rdi),%r11d jmp .Lloop -.align 16 +.balign 16 .Lloop: xor %rdi,%rdi mov 4*0(%rsi),%r12d @@ -873,7 +873,7 @@ ENTRY_NP(SHA256TransformBlocks) add %r14d,%eax # h+=Maj(a,b,c) jmp .Lrounds_16_xx -.align 16 +.balign 16 .Lrounds_16_xx: mov 4(%rsp),%r13d mov 56(%rsp),%r12d @@ -2064,8 +2064,8 @@ ENTRY_NP(SHA256TransformBlocks) SET_SIZE(SHA256TransformBlocks) .section .rodata -.align 64 -.type K256,@object +.balign 64 +SET_OBJ(K256) K256: .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/sha2/sha512_impl.S b/sys/contrib/openzfs/module/icp/asm-x86_64/sha2/sha512_impl.S index ed7fb362a1ac..520f5b6dab24 100644 --- a/sys/contrib/openzfs/module/icp/asm-x86_64/sha2/sha512_impl.S +++ b/sys/contrib/openzfs/module/icp/asm-x86_64/sha2/sha512_impl.S @@ -134,7 +134,7 @@ ENTRY_NP(SHA512TransformBlocks) mov 8*7(%rdi),%r11 jmp .Lloop -.align 16 +.balign 16 .Lloop: xor %rdi,%rdi mov 8*0(%rsi),%r12 @@ -874,7 +874,7 @@ ENTRY_NP(SHA512TransformBlocks) add %r14,%rax # h+=Maj(a,b,c) jmp .Lrounds_16_xx -.align 16 +.balign 16 .Lrounds_16_xx: mov 8(%rsp),%r13 mov 112(%rsp),%r12 @@ -2065,8 +2065,8 @@ ENTRY_NP(SHA512TransformBlocks) SET_SIZE(SHA512TransformBlocks) .section .rodata -.align 64 -.type K512,@object +.balign 64 +SET_OBJ(K512) K512: .quad 0x428a2f98d728ae22,0x7137449123ef65cd .quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc @@ -2110,6 +2110,7 @@ K512: .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 #endif /* !lint && !__lint */ -#ifdef __ELF__ +#if defined(__ELF__) .section .note.GNU-stack,"",%progbits #endif + diff --git a/sys/contrib/openzfs/module/icp/core/kcf_callprov.c b/sys/contrib/openzfs/module/icp/core/kcf_callprov.c index f06b3cd00bcf..b1822dd5b878 100644 --- a/sys/contrib/openzfs/module/icp/core/kcf_callprov.c +++ b/sys/contrib/openzfs/module/icp/core/kcf_callprov.c @@ -63,7 +63,7 @@ is_in_triedlist(kcf_provider_desc_t *pd, kcf_prov_tried_t *triedl) if (triedl->pt_pd == pd) return (B_TRUE); triedl = triedl->pt_next; - }; + } return (B_FALSE); } diff --git a/sys/contrib/openzfs/module/icp/include/aes/aes_impl.h b/sys/contrib/openzfs/module/icp/include/aes/aes_impl.h index fe5c23974682..66eb4a6c8fb6 100644 --- a/sys/contrib/openzfs/module/icp/include/aes/aes_impl.h +++ b/sys/contrib/openzfs/module/icp/include/aes/aes_impl.h @@ -36,6 +36,7 @@ extern "C" { #include #include +#include /* Similar to sysmacros.h IS_P2ALIGNED, but checks two pointers: */ #define IS_P2ALIGNED2(v, w, a) \ @@ -190,13 +191,13 @@ extern const aes_impl_ops_t aes_generic_impl; extern const aes_impl_ops_t aes_x86_64_impl; /* These functions are used to execute amd64 instructions for AMD or Intel: */ -extern int rijndael_key_setup_enc_amd64(uint32_t rk[], +extern ASMABI int rijndael_key_setup_enc_amd64(uint32_t rk[], const uint32_t cipherKey[], int keyBits); -extern int rijndael_key_setup_dec_amd64(uint32_t rk[], +extern ASMABI int rijndael_key_setup_dec_amd64(uint32_t rk[], const uint32_t cipherKey[], int keyBits); -extern void aes_encrypt_amd64(const uint32_t rk[], int Nr, +extern ASMABI void aes_encrypt_amd64(const uint32_t rk[], int Nr, const uint32_t pt[4], uint32_t ct[4]); -extern void aes_decrypt_amd64(const uint32_t rk[], int Nr, +extern ASMABI void aes_decrypt_amd64(const uint32_t rk[], int Nr, const uint32_t ct[4], uint32_t pt[4]); #endif #if defined(__x86_64) && defined(HAVE_AES) diff --git a/sys/contrib/openzfs/module/icp/include/sys/ia32/stack.h b/sys/contrib/openzfs/module/icp/include/sys/ia32/stack.h deleted file mode 100644 index 9ed327b343a6..000000000000 --- a/sys/contrib/openzfs/module/icp/include/sys/ia32/stack.h +++ /dev/null @@ -1,160 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or https://opensource.org/licenses/CDDL-1.0. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _IA32_SYS_STACK_H -#define _IA32_SYS_STACK_H - -#if !defined(_ASM) - -#include - -#endif - -#ifdef __cplusplus -extern "C" { -#endif - -/* - * In the x86 world, a stack frame looks like this: - * - * |--------------------------| - * 4n+8(%ebp) ->| argument word n | - * | ... | (Previous frame) - * 8(%ebp) ->| argument word 0 | - * |--------------------------|-------------------- - * 4(%ebp) ->| return address | - * |--------------------------| - * 0(%ebp) ->| previous %ebp (optional) | - * |--------------------------| - * -4(%ebp) ->| unspecified | (Current frame) - * | ... | - * 0(%esp) ->| variable size | - * |--------------------------| - */ - -/* - * Stack alignment macros. - */ - -#define STACK_ALIGN32 4 -#define STACK_ENTRY_ALIGN32 4 -#define STACK_BIAS32 0 -#define SA32(x) (((x)+(STACK_ALIGN32-1)) & ~(STACK_ALIGN32-1)) -#define STACK_RESERVE32 0 -#define MINFRAME32 0 - -#if defined(__amd64) - -/* - * In the amd64 world, a stack frame looks like this: - * - * |--------------------------| - * 8n+16(%rbp)->| argument word n | - * | ... | (Previous frame) - * 16(%rbp) ->| argument word 0 | - * |--------------------------|-------------------- - * 8(%rbp) ->| return address | - * |--------------------------| - * 0(%rbp) ->| previous %rbp | - * |--------------------------| - * -8(%rbp) ->| unspecified | (Current frame) - * | ... | - * 0(%rsp) ->| variable size | - * |--------------------------| - * -128(%rsp) ->| reserved for function | - * |--------------------------| - * - * The end of the input argument area must be aligned on a 16-byte - * boundary; i.e. (%rsp - 8) % 16 == 0 at function entry. - * - * The 128-byte location beyond %rsp is considered to be reserved for - * functions and is NOT modified by signal handlers. It can be used - * to store temporary data that is not needed across function calls. - */ - -/* - * Stack alignment macros. - */ - -#define STACK_ALIGN64 16 -#define STACK_ENTRY_ALIGN64 8 -#define STACK_BIAS64 0 -#define SA64(x) (((x)+(STACK_ALIGN64-1)) & ~(STACK_ALIGN64-1)) -#define STACK_RESERVE64 128 -#define MINFRAME64 0 - -#define STACK_ALIGN STACK_ALIGN64 -#define STACK_ENTRY_ALIGN STACK_ENTRY_ALIGN64 -#define STACK_BIAS STACK_BIAS64 -#define SA(x) SA64(x) -#define STACK_RESERVE STACK_RESERVE64 -#define MINFRAME MINFRAME64 - -#elif defined(__i386) - -#define STACK_ALIGN STACK_ALIGN32 -#define STACK_ENTRY_ALIGN STACK_ENTRY_ALIGN32 -#define STACK_BIAS STACK_BIAS32 -#define SA(x) SA32(x) -#define STACK_RESERVE STACK_RESERVE32 -#define MINFRAME MINFRAME32 - -#endif /* __i386 */ - -#if defined(_KERNEL) && !defined(_ASM) - -#if defined(ZFS_DEBUG) -#if STACK_ALIGN == 4 -#define ASSERT_STACK_ALIGNED() \ - { \ - uint32_t __tmp; \ - ASSERT((((uintptr_t)&__tmp) & (STACK_ALIGN - 1)) == 0); \ - } -#elif (STACK_ALIGN == 16) && (_LONG_DOUBLE_ALIGNMENT == 16) -#define ASSERT_STACK_ALIGNED() \ - { \ - long double __tmp; \ - ASSERT((((uintptr_t)&__tmp) & (STACK_ALIGN - 1)) == 0); \ - } -#endif -#else /* DEBUG */ -#define ASSERT_STACK_ALIGNED() -#endif /* DEBUG */ - -struct regs; - -void traceregs(struct regs *); -void traceback(caddr_t); - -#endif /* defined(_KERNEL) && !defined(_ASM) */ - -#define STACK_GROWTH_DOWN /* stacks grow from high to low addresses */ - -#ifdef __cplusplus -} -#endif - -#endif /* _IA32_SYS_STACK_H */ diff --git a/sys/contrib/openzfs/module/icp/include/sys/ia32/trap.h b/sys/contrib/openzfs/module/icp/include/sys/ia32/trap.h deleted file mode 100644 index 3d74266326b2..000000000000 --- a/sys/contrib/openzfs/module/icp/include/sys/ia32/trap.h +++ /dev/null @@ -1,107 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or https://opensource.org/licenses/CDDL-1.0. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */ -/* Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T */ -/* All Rights Reserved */ - -/* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _IA32_SYS_TRAP_H -#define _IA32_SYS_TRAP_H - -#ifdef __cplusplus -extern "C" { -#endif - -/* - * Trap type values - */ - -#define T_ZERODIV 0x0 /* #de divide by 0 error */ -#define T_SGLSTP 0x1 /* #db single step */ -#define T_NMIFLT 0x2 /* NMI */ -#define T_BPTFLT 0x3 /* #bp breakpoint fault, INT3 insn */ -#define T_OVFLW 0x4 /* #of INTO overflow fault */ -#define T_BOUNDFLT 0x5 /* #br BOUND insn fault */ -#define T_ILLINST 0x6 /* #ud invalid opcode fault */ -#define T_NOEXTFLT 0x7 /* #nm device not available: x87 */ -#define T_DBLFLT 0x8 /* #df double fault */ -#define T_EXTOVRFLT 0x9 /* [not generated: 386 only] */ -#define T_TSSFLT 0xa /* #ts invalid TSS fault */ -#define T_SEGFLT 0xb /* #np segment not present fault */ -#define T_STKFLT 0xc /* #ss stack fault */ -#define T_GPFLT 0xd /* #gp general protection fault */ -#define T_PGFLT 0xe /* #pf page fault */ -#define T_EXTERRFLT 0x10 /* #mf x87 FPU error fault */ -#define T_ALIGNMENT 0x11 /* #ac alignment check error */ -#define T_MCE 0x12 /* #mc machine check exception */ -#define T_SIMDFPE 0x13 /* #xm SSE/SSE exception */ -#define T_DBGENTR 0x14 /* debugger entry */ -#define T_ENDPERR 0x21 /* emulated extension error flt */ -#define T_ENOEXTFLT 0x20 /* emulated ext not present */ -#define T_FASTTRAP 0xd2 /* fast system call */ -#define T_SYSCALLINT 0x91 /* general system call */ -#define T_DTRACE_RET 0x7f /* DTrace pid return */ -#define T_INT80 0x80 /* int80 handler for linux emulation */ -#define T_SOFTINT 0x50fd /* pseudo softint trap type */ - -/* - * Pseudo traps. - */ -#define T_INTERRUPT 0x100 -#define T_FAULT 0x200 -#define T_AST 0x400 -#define T_SYSCALL 0x180 - - -/* - * Values of error code on stack in case of page fault - */ - -#define PF_ERR_MASK 0x01 /* Mask for error bit */ -#define PF_ERR_PAGE 0x00 /* page not present */ -#define PF_ERR_PROT 0x01 /* protection error */ -#define PF_ERR_WRITE 0x02 /* fault caused by write (else read) */ -#define PF_ERR_USER 0x04 /* processor was in user mode */ - /* (else supervisor) */ -#define PF_ERR_EXEC 0x10 /* attempt to execute a No eXec page (AMD) */ - -/* - * Definitions for fast system call subfunctions - */ -#define T_FNULL 0 /* Null trap for testing */ -#define T_FGETFP 1 /* Get emulated FP context */ -#define T_FSETFP 2 /* Set emulated FP context */ -#define T_GETHRTIME 3 /* Get high resolution time */ -#define T_GETHRVTIME 4 /* Get high resolution virtual time */ -#define T_GETHRESTIME 5 /* Get high resolution time */ -#define T_GETLGRP 6 /* Get home lgrpid */ - -#define T_LASTFAST 6 /* Last valid subfunction */ - -#ifdef __cplusplus -} -#endif - -#endif /* _IA32_SYS_TRAP_H */ diff --git a/sys/contrib/openzfs/module/lua/ldo.c b/sys/contrib/openzfs/module/lua/ldo.c index 291bca044e7b..bf525588e260 100644 --- a/sys/contrib/openzfs/module/lua/ldo.c +++ b/sys/contrib/openzfs/module/lua/ldo.c @@ -9,6 +9,7 @@ #define LUA_CORE #include +#include #include "lapi.h" #include "ldebug.h" @@ -27,7 +28,6 @@ #include "lzio.h" - /* Return the number of bytes available on the stack. */ #if defined (_KERNEL) && defined(__linux__) #include @@ -90,8 +90,8 @@ static intptr_t stack_remaining(void) { typedef struct _label_t { long long unsigned val[JMP_BUF_CNT]; } label_t; -int setjmp(label_t *) __attribute__ ((__nothrow__)); -extern __attribute__((noreturn)) void longjmp(label_t *); +int ASMABI setjmp(label_t *) __attribute__ ((__nothrow__)); +extern __attribute__((noreturn)) void ASMABI longjmp(label_t *); #define LUAI_THROW(L,c) longjmp(&(c)->b) #define LUAI_TRY(L,c,a) if (setjmp(&(c)->b) == 0) { a } diff --git a/sys/contrib/openzfs/module/lua/lfunc.h b/sys/contrib/openzfs/module/lua/lfunc.h index ca0d3a3e0b03..1dc6995ca9d7 100644 --- a/sys/contrib/openzfs/module/lua/lfunc.h +++ b/sys/contrib/openzfs/module/lua/lfunc.h @@ -12,10 +12,10 @@ #define sizeCclosure(n) (cast(int, sizeof(CClosure)) + \ - cast(int, sizeof(TValue)*((n)-1))) + cast(int, sizeof(TValue)*((n)))) #define sizeLclosure(n) (cast(int, sizeof(LClosure)) + \ - cast(int, sizeof(TValue *)*((n)-1))) + cast(int, sizeof(TValue *)*((n)))) LUAI_FUNC Proto *luaF_newproto (lua_State *L); diff --git a/sys/contrib/openzfs/module/lua/lgc.c b/sys/contrib/openzfs/module/lua/lgc.c index 0ec18ea4839f..ccb8c019b94a 100644 --- a/sys/contrib/openzfs/module/lua/lgc.c +++ b/sys/contrib/openzfs/module/lua/lgc.c @@ -1056,7 +1056,7 @@ static lu_mem singlestep (lua_State *L) { lu_mem work; int sw; g->gcstate = GCSatomic; /* finish mark phase */ - g->GCestimate = g->GCmemtrav; /* save what was counted */; + g->GCestimate = g->GCmemtrav; /* save what was counted */ work = atomic(L); /* add what was traversed by 'atomic' */ g->GCestimate += work; /* estimate of total memory traversed */ sw = entersweep(L); diff --git a/sys/contrib/openzfs/module/lua/lgc.h b/sys/contrib/openzfs/module/lua/lgc.h index 84bb1cdf99fa..02f17fe1239e 100644 --- a/sys/contrib/openzfs/module/lua/lgc.h +++ b/sys/contrib/openzfs/module/lua/lgc.h @@ -120,7 +120,7 @@ #define luaC_condGC(L,c) \ - {if (G(L)->GCdebt > 0) {c;}; condchangemem(L);} + {if (G(L)->GCdebt > 0) {c;} condchangemem(L);} #define luaC_checkGC(L) luaC_condGC(L, luaC_step(L);) diff --git a/sys/contrib/openzfs/module/lua/lobject.h b/sys/contrib/openzfs/module/lua/lobject.h index d29d0068c7e6..b7c6b41ac7f4 100644 --- a/sys/contrib/openzfs/module/lua/lobject.h +++ b/sys/contrib/openzfs/module/lua/lobject.h @@ -513,14 +513,14 @@ typedef struct UpVal { typedef struct CClosure { ClosureHeader; lua_CFunction f; - TValue upvalue[1]; /* list of upvalues */ + TValue upvalue[]; /* list of upvalues */ } CClosure; typedef struct LClosure { ClosureHeader; struct Proto *p; - UpVal *upvals[1]; /* list of upvalues */ + UpVal *upvals[]; /* list of upvalues */ } LClosure; diff --git a/sys/contrib/openzfs/module/lua/lvm.c b/sys/contrib/openzfs/module/lua/lvm.c index b5545732535c..53b9884f0a71 100644 --- a/sys/contrib/openzfs/module/lua/lvm.c +++ b/sys/contrib/openzfs/module/lua/lvm.c @@ -568,7 +568,7 @@ void luaV_finishOp (lua_State *L) { #define donextjump(ci) { i = *ci->u.l.savedpc; dojump(ci, i, 1); } -#define Protect(x) { {x;}; base = ci->u.l.base; } +#define Protect(x) { {x;} base = ci->u.l.base; } #define checkGC(L,c) \ Protect( luaC_condGC(L,{L->top = (c); /* limit of live values */ \ diff --git a/sys/contrib/openzfs/module/lua/setjmp/setjmp_aarch64.S b/sys/contrib/openzfs/module/lua/setjmp/setjmp_aarch64.S index a5a9a85fd57e..040ef1821ab0 100644 --- a/sys/contrib/openzfs/module/lua/setjmp/setjmp_aarch64.S +++ b/sys/contrib/openzfs/module/lua/setjmp/setjmp_aarch64.S @@ -35,7 +35,7 @@ #define ENTRY(sym) \ .text; \ .globl sym; \ - .align 2; \ + .balign 2; \ .type sym,#function; \ sym: diff --git a/sys/contrib/openzfs/module/lua/setjmp/setjmp_arm.S b/sys/contrib/openzfs/module/lua/setjmp/setjmp_arm.S index 78bc3e0b347d..0b18a96282cf 100644 --- a/sys/contrib/openzfs/module/lua/setjmp/setjmp_arm.S +++ b/sys/contrib/openzfs/module/lua/setjmp/setjmp_arm.S @@ -40,7 +40,7 @@ #define ENTRY(x) \ .text; \ .syntax unified; \ - .align 2; \ + .balign 2; \ .global x; \ .type x,#function; \ _FUNC_MODE; \ diff --git a/sys/contrib/openzfs/module/lua/setjmp/setjmp_i386.S b/sys/contrib/openzfs/module/lua/setjmp/setjmp_i386.S index 0d0adfc351ca..87f9cb08c292 100644 --- a/sys/contrib/openzfs/module/lua/setjmp/setjmp_i386.S +++ b/sys/contrib/openzfs/module/lua/setjmp/setjmp_i386.S @@ -25,7 +25,7 @@ #define ENTRY(x) \ .text; \ - .align 8; \ + .balign 8; \ .globl x; \ .type x, @function; \ x: diff --git a/sys/contrib/openzfs/module/lua/setjmp/setjmp_ppc.S b/sys/contrib/openzfs/module/lua/setjmp/setjmp_ppc.S index 72aa5d5ab5b0..a035cd11b33b 100644 --- a/sys/contrib/openzfs/module/lua/setjmp/setjmp_ppc.S +++ b/sys/contrib/openzfs/module/lua/setjmp/setjmp_ppc.S @@ -54,7 +54,7 @@ #ifdef PPC64_ELF_ABI_v2 #define ENTRY(name) \ - .align 2 ; \ + .balign 2 ; \ .type name,@function; \ .weak name; \ name: @@ -64,7 +64,7 @@ name: #define XGLUE(a,b) a##b #define GLUE(a,b) XGLUE(a,b) #define ENTRY(name) \ - .align 2 ; \ + .balign 2 ; \ .weak name; \ .weak GLUE(.,name); \ .pushsection ".opd","aw"; \ diff --git a/sys/contrib/openzfs/module/lua/setjmp/setjmp_sparc64.S b/sys/contrib/openzfs/module/lua/setjmp/setjmp_sparc64.S index a37a71cbce33..e1099643de92 100644 --- a/sys/contrib/openzfs/module/lua/setjmp/setjmp_sparc64.S +++ b/sys/contrib/openzfs/module/lua/setjmp/setjmp_sparc64.S @@ -50,7 +50,7 @@ #define ENTRY(x) \ .text ; \ - .align 32 ; \ + .balign 32 ; \ .globl x ; \ .type x,@function ; \ x: diff --git a/sys/contrib/openzfs/module/lua/setjmp/setjmp_x86_64.S b/sys/contrib/openzfs/module/lua/setjmp/setjmp_x86_64.S index 7e13fea05dda..337fceb15b00 100644 --- a/sys/contrib/openzfs/module/lua/setjmp/setjmp_x86_64.S +++ b/sys/contrib/openzfs/module/lua/setjmp/setjmp_x86_64.S @@ -27,28 +27,16 @@ #include #endif -#ifndef RET -#define RET ret -#endif - -#undef ENTRY -#define ENTRY(x) \ - .text; \ - .align 8; \ - .globl x; \ - .type x, @function; \ -x: - -#define SET_SIZE(x) \ - .size x, [.-x] - /* * Setjmp and longjmp implement non-local gotos using state vectors * type label_t. */ #ifdef __x86_64__ - ENTRY(setjmp) +#define _ASM +#include + +ENTRY_ALIGN(setjmp, 8) movq %rsp, 0(%rdi) movq %rbp, 8(%rdi) movq %rbx, 16(%rdi) @@ -62,7 +50,7 @@ x: RET SET_SIZE(setjmp) - ENTRY(longjmp) +ENTRY_ALIGN(longjmp, 8) movq 0(%rdi), %rsp movq 8(%rdi), %rbp movq 16(%rdi), %rbx diff --git a/sys/contrib/openzfs/module/os/freebsd/spl/callb.c b/sys/contrib/openzfs/module/os/freebsd/spl/callb.c index 47f3ccc0c7fa..850f37ddf5a8 100644 --- a/sys/contrib/openzfs/module/os/freebsd/spl/callb.c +++ b/sys/contrib/openzfs/module/os/freebsd/spl/callb.c @@ -146,7 +146,7 @@ callb_add_common(boolean_t (*func)(void *arg, int code), cv_wait(&ct->ct_busy_cv, &ct->ct_lock); if ((cp = ct->ct_freelist) == NULL) { ct->ct_ncallb++; - cp = (callb_t *)kmem_zalloc(sizeof (callb_t), KM_SLEEP); + cp = kmem_zalloc(sizeof (callb_t), KM_SLEEP); } ct->ct_freelist = cp->c_next; cp->c_thread = t; @@ -263,7 +263,7 @@ callb_execute_class(int class, int code) mutex_enter(&ct->ct_lock); for (cp = ct->ct_first_cb[class]; - cp != NULL && ret == 0; cp = cp->c_next) { + cp != NULL && ret == NULL; cp = cp->c_next) { while (cp->c_flag & CALLB_EXECUTING) cv_wait(&cp->c_done_cv, &ct->ct_lock); /* diff --git a/sys/contrib/openzfs/module/os/freebsd/spl/spl_kstat.c b/sys/contrib/openzfs/module/os/freebsd/spl/spl_kstat.c index 059ada235c4a..9f5f92e194ec 100644 --- a/sys/contrib/openzfs/module/os/freebsd/spl/spl_kstat.c +++ b/sys/contrib/openzfs/module/os/freebsd/spl/spl_kstat.c @@ -39,6 +39,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include static MALLOC_DEFINE(M_KSTAT, "kstat_data", "Kernel statistics"); @@ -134,6 +135,55 @@ kstat_sysctl_string(SYSCTL_HANDLER_ARGS) return (sysctl_handle_string(oidp, val, len, req)); } +static int +kstat_sysctl_dataset(SYSCTL_HANDLER_ARGS) +{ + kstat_t *ksp = arg1; + kstat_named_t *ksent; + kstat_named_t *ksent_ds; + uint64_t val; + char *ds_name; + uint32_t ds_len = 0; + + ksent_ds = ksent = ksp->ks_data; + ds_name = KSTAT_NAMED_STR_PTR(ksent_ds); + ds_len = KSTAT_NAMED_STR_BUFLEN(ksent_ds); + ds_name[ds_len-1] = '\0'; + + if (!zone_dataset_visible(ds_name, NULL)) { + return (EPERM); + } + + /* Select the correct element */ + ksent += arg2; + /* Update the aggsums before reading */ + (void) ksp->ks_update(ksp, KSTAT_READ); + val = ksent->value.ui64; + + return (sysctl_handle_64(oidp, &val, 0, req)); +} + +static int +kstat_sysctl_dataset_string(SYSCTL_HANDLER_ARGS) +{ + kstat_t *ksp = arg1; + kstat_named_t *ksent = ksp->ks_data; + char *val; + uint32_t len = 0; + + /* Select the correct element */ + ksent += arg2; + val = KSTAT_NAMED_STR_PTR(ksent); + len = KSTAT_NAMED_STR_BUFLEN(ksent); + val[len-1] = '\0'; + + if (!zone_dataset_visible(val, NULL)) { + return (EPERM); + } + + return (sysctl_handle_string(oidp, val, len, req)); +} + static int kstat_sysctl_io(SYSCTL_HANDLER_ARGS) { @@ -422,11 +472,20 @@ kstat_install_named(kstat_t *ksp) ksp, i, kstat_sysctl, "Q", namelast); break; case KSTAT_DATA_UINT64: - SYSCTL_ADD_PROC(&ksp->ks_sysctl_ctx, - SYSCTL_CHILDREN(ksp->ks_sysctl_root), - OID_AUTO, namelast, - CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_MPSAFE, - ksp, i, kstat_sysctl, "QU", namelast); + if (strcmp(ksp->ks_class, "dataset") == 0) { + SYSCTL_ADD_PROC(&ksp->ks_sysctl_ctx, + SYSCTL_CHILDREN(ksp->ks_sysctl_root), + OID_AUTO, namelast, + CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_MPSAFE, + ksp, i, kstat_sysctl_dataset, "QU", + namelast); + } else { + SYSCTL_ADD_PROC(&ksp->ks_sysctl_ctx, + SYSCTL_CHILDREN(ksp->ks_sysctl_root), + OID_AUTO, namelast, + CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_MPSAFE, + ksp, i, kstat_sysctl, "QU", namelast); + } break; case KSTAT_DATA_LONG: SYSCTL_ADD_PROC(&ksp->ks_sysctl_ctx, @@ -443,11 +502,21 @@ kstat_install_named(kstat_t *ksp) ksp, i, kstat_sysctl, "LU", namelast); break; case KSTAT_DATA_STRING: - SYSCTL_ADD_PROC(&ksp->ks_sysctl_ctx, - SYSCTL_CHILDREN(ksp->ks_sysctl_root), - OID_AUTO, namelast, - CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, - ksp, i, kstat_sysctl_string, "A", namelast); + if (strcmp(ksp->ks_class, "dataset") == 0) { + SYSCTL_ADD_PROC(&ksp->ks_sysctl_ctx, + SYSCTL_CHILDREN(ksp->ks_sysctl_root), + OID_AUTO, namelast, CTLTYPE_STRING | + CTLFLAG_RD | CTLFLAG_MPSAFE, + ksp, i, kstat_sysctl_dataset_string, "A", + namelast); + } else { + SYSCTL_ADD_PROC(&ksp->ks_sysctl_ctx, + SYSCTL_CHILDREN(ksp->ks_sysctl_root), + OID_AUTO, namelast, CTLTYPE_STRING | + CTLFLAG_RD | CTLFLAG_MPSAFE, + ksp, i, kstat_sysctl_string, "A", + namelast); + } break; default: panic("unsupported type: %d", typelast); diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/kmod_core.c b/sys/contrib/openzfs/module/os/freebsd/zfs/kmod_core.c index bb3cbc39ec75..e4c6cf7d097d 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/kmod_core.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/kmod_core.c @@ -133,18 +133,15 @@ zfsdev_ioctl(struct cdev *dev, ulong_t zcmd, caddr_t arg, int flag, len = IOCPARM_LEN(zcmd); vecnum = zcmd & 0xff; zp = (void *)arg; - uaddr = (void *)(uintptr_t)zp->zfs_cmd; error = 0; #ifdef ZFS_LEGACY_SUPPORT zcl = NULL; #endif - if (len != sizeof (zfs_iocparm_t)) { - printf("len %d vecnum: %d sizeof (zfs_cmd_t) %ju\n", - len, vecnum, (uintmax_t)sizeof (zfs_cmd_t)); + if (len != sizeof (zfs_iocparm_t)) return (EINVAL); - } + uaddr = (void *)(uintptr_t)zp->zfs_cmd; zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP); #ifdef ZFS_LEGACY_SUPPORT /* diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/spa_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/spa_os.c index 45ea10bb487d..449c1624817e 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/spa_os.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/spa_os.c @@ -94,6 +94,8 @@ spa_generate_rootconf(const char *name) for (i = 0; i < count; i++) { uint64_t txg; + if (configs[i] == NULL) + continue; txg = fnvlist_lookup_uint64(configs[i], ZPOOL_CONFIG_POOL_TXG); if (txg > best_txg) { best_txg = txg; diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c index 48af1eaf8ea7..bd6cfc86ce2a 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c @@ -366,10 +366,10 @@ SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, anon_size, CTLFLAG_RD, &ARC_anon.arcs_size.rc_count, 0, "size of anonymous state"); SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, anon_metadata_esize, CTLFLAG_RD, &ARC_anon.arcs_esize[ARC_BUFC_METADATA].rc_count, 0, - "size of anonymous state"); + "size of metadata in anonymous state"); SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, anon_data_esize, CTLFLAG_RD, &ARC_anon.arcs_esize[ARC_BUFC_DATA].rc_count, 0, - "size of anonymous state"); + "size of data in anonymous state"); /* END CSTYLED */ extern arc_state_t ARC_mru; @@ -424,6 +424,19 @@ SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_ghost_data_esize, CTLFLAG_RD, "size of data in mfu ghost state"); /* END CSTYLED */ +extern arc_state_t ARC_uncached; + +/* BEGIN CSTYLED */ +SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, uncached_size, CTLFLAG_RD, + &ARC_uncached.arcs_size.rc_count, 0, "size of uncached state"); +SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, uncached_metadata_esize, CTLFLAG_RD, + &ARC_uncached.arcs_esize[ARC_BUFC_METADATA].rc_count, 0, + "size of metadata in uncached state"); +SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, uncached_data_esize, CTLFLAG_RD, + &ARC_uncached.arcs_esize[ARC_BUFC_DATA].rc_count, 0, + "size of data in uncached state"); +/* END CSTYLED */ + extern arc_state_t ARC_l2c_only; /* BEGIN CSTYLED */ @@ -459,20 +472,6 @@ SYSCTL_UINT(_vfs_zfs_zfetch, OID_AUTO, max_idistance, /* dnode.c */ -extern int zfs_default_bs; - -/* BEGIN CSTYLED */ -SYSCTL_INT(_vfs_zfs, OID_AUTO, default_bs, CTLFLAG_RWTUN, - &zfs_default_bs, 0, "Default dnode block shift"); -/* END CSTYLED */ - -extern int zfs_default_ibs; - -/* BEGIN CSTYLED */ -SYSCTL_INT(_vfs_zfs, OID_AUTO, default_ibs, CTLFLAG_RWTUN, - &zfs_default_ibs, 0, "Default dnode indirect block shift"); -/* END CSTYLED */ - /* dsl_scan.c */ /* metaslab.c */ diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_acl.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_acl.c index 64906b3bad63..9f735dbb558c 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_acl.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_acl.c @@ -2041,8 +2041,7 @@ zfs_zaccess_dataset_check(znode_t *zp, uint32_t v4_mode) { if ((v4_mode & WRITE_MASK) && (zp->z_zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) && - (!IS_DEVVP(ZTOV(zp)) || - (IS_DEVVP(ZTOV(zp)) && (v4_mode & WRITE_MASK_ATTRS)))) { + (!IS_DEVVP(ZTOV(zp)) || (v4_mode & WRITE_MASK_ATTRS))) { return (SET_ERROR(EROFS)); } @@ -2415,7 +2414,6 @@ zfs_zaccess(znode_t *zp, int mode, int flags, boolean_t skipaclchk, cred_t *cr, * read_acl/read_attributes */ - error = 0; ASSERT3U(working_mode, !=, 0); if ((working_mode & (ACE_READ_ACL|ACE_READ_ATTRIBUTES) && diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_debug.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_debug.c index abb3c0033194..78d50c6fd8b7 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_debug.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_debug.c @@ -30,7 +30,7 @@ typedef struct zfs_dbgmsg { list_node_t zdm_node; time_t zdm_timestamp; uint_t zdm_size; - char zdm_msg[1]; /* variable length allocation */ + char zdm_msg[]; } zfs_dbgmsg_t; static list_t zfs_dbgmsgs; @@ -159,7 +159,7 @@ __zfs_dbgmsg(char *buf) DTRACE_PROBE1(zfs__dbgmsg, char *, buf); - size = sizeof (zfs_dbgmsg_t) + strlen(buf); + size = sizeof (zfs_dbgmsg_t) + strlen(buf) + 1; zdm = kmem_zalloc(size, KM_SLEEP); zdm->zdm_size = size; zdm->zdm_timestamp = gethrestime_sec(); diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_dir.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_dir.c index 07232086d52b..948df8e50de1 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_dir.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_dir.c @@ -426,6 +426,7 @@ zfs_rmnode(znode_t *zp) zfsvfs_t *zfsvfs = zp->z_zfsvfs; objset_t *os = zfsvfs->z_os; dmu_tx_t *tx; + uint64_t z_id = zp->z_id; uint64_t acl_obj; uint64_t xattr_obj; uint64_t count; @@ -445,8 +446,10 @@ zfs_rmnode(znode_t *zp) * Not enough space to delete some xattrs. * Leave it in the unlinked set. */ + ZFS_OBJ_HOLD_ENTER(zfsvfs, z_id); zfs_znode_dmu_fini(zp); zfs_znode_free(zp); + ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id); return; } } else { @@ -464,8 +467,10 @@ zfs_rmnode(znode_t *zp) * Not enough space or we were interrupted by unmount. * Leave the file in the unlinked set. */ + ZFS_OBJ_HOLD_ENTER(zfsvfs, z_id); zfs_znode_dmu_fini(zp); zfs_znode_free(zp); + ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id); return; } } @@ -501,8 +506,10 @@ zfs_rmnode(znode_t *zp) * which point we'll call zfs_unlinked_drain() to process it). */ dmu_tx_abort(tx); + ZFS_OBJ_HOLD_ENTER(zfsvfs, z_id); zfs_znode_dmu_fini(zp); zfs_znode_free(zp); + ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id); return; } diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_znode.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_znode.c index 1064ea5cf01d..76ae09f811eb 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_znode.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_znode.c @@ -386,7 +386,6 @@ void zfs_znode_dmu_fini(znode_t *zp) { ASSERT(MUTEX_HELD(ZFS_OBJ_MUTEX(zp->z_zfsvfs, zp->z_id)) || - zp->z_unlinked || ZFS_TEARDOWN_INACTIVE_WRITE_HELD(zp->z_zfsvfs)); sa_handle_destroy(zp->z_sa_hdl); @@ -540,7 +539,9 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz, * Acquire vnode lock before making it available to the world. */ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); +#if __FreeBSD_version >= 1400077 vn_set_state(vp, VSTATE_CONSTRUCTED); +#endif VN_LOCK_AREC(vp); if (vp->v_type != VFIFO) VN_LOCK_ASHARE(vp); @@ -1706,6 +1707,7 @@ zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx) } ASSERT3U(version, !=, 0); error = zap_update(os, moid, ZPL_VERSION_STR, 8, 1, &version, tx); + ASSERT0(error); /* * Create zap object used for SA attribute registration diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zio_crypt.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zio_crypt.c index c5e745f7d196..fdbe13dbb5e9 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/zio_crypt.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zio_crypt.c @@ -1555,13 +1555,12 @@ zio_crypt_init_uios_normal(boolean_t encrypt, uint8_t *plainbuf, iovec_t *plain_iovecs = NULL, *cipher_iovecs = NULL; void *src, *dst; - cipher_iovecs = kmem_alloc(nr_cipher * sizeof (iovec_t), + cipher_iovecs = kmem_zalloc(nr_cipher * sizeof (iovec_t), KM_SLEEP); if (!cipher_iovecs) { ret = SET_ERROR(ENOMEM); goto error; } - memset(cipher_iovecs, 0, nr_cipher * sizeof (iovec_t)); if (encrypt) { src = plainbuf; diff --git a/sys/contrib/openzfs/module/os/linux/spl/spl-kmem-cache.c b/sys/contrib/openzfs/module/os/linux/spl/spl-kmem-cache.c index edd04783b363..963e7a1ec96a 100644 --- a/sys/contrib/openzfs/module/os/linux/spl/spl-kmem-cache.c +++ b/sys/contrib/openzfs/module/os/linux/spl/spl-kmem-cache.c @@ -701,7 +701,7 @@ spl_kmem_cache_create(const char *name, size_t size, size_t align, skc->skc_magic = SKC_MAGIC; skc->skc_name_size = strlen(name) + 1; - skc->skc_name = (char *)kmalloc(skc->skc_name_size, lflags); + skc->skc_name = kmalloc(skc->skc_name_size, lflags); if (skc->skc_name == NULL) { kfree(skc); return (NULL); @@ -791,10 +791,8 @@ spl_kmem_cache_create(const char *name, size_t size, size_t align, } else { unsigned long slabflags = 0; - if (size > (SPL_MAX_KMEM_ORDER_NR_PAGES * PAGE_SIZE)) { - rc = EINVAL; + if (size > (SPL_MAX_KMEM_ORDER_NR_PAGES * PAGE_SIZE)) goto out; - } #if defined(SLAB_USERCOPY) /* @@ -815,10 +813,8 @@ spl_kmem_cache_create(const char *name, size_t size, size_t align, skc->skc_linux_cache = kmem_cache_create( skc->skc_name, size, align, slabflags, NULL); #endif - if (skc->skc_linux_cache == NULL) { - rc = ENOMEM; + if (skc->skc_linux_cache == NULL) goto out; - } } down_write(&spl_kmem_cache_sem); diff --git a/sys/contrib/openzfs/module/os/linux/spl/spl-thread.c b/sys/contrib/openzfs/module/os/linux/spl/spl-thread.c index b863945a1c59..b4ef86a5e4a6 100644 --- a/sys/contrib/openzfs/module/os/linux/spl/spl-thread.c +++ b/sys/contrib/openzfs/module/os/linux/spl/spl-thread.c @@ -178,12 +178,11 @@ issig(int why) sigorsets(&set, &task->blocked, &set); spin_lock_irq(&task->sighand->siglock); - int ret; #ifdef HAVE_DEQUEUE_SIGNAL_4ARG enum pid_type __type; - if ((ret = dequeue_signal(task, &set, &__info, &__type)) != 0) { + if (dequeue_signal(task, &set, &__info, &__type) != 0) { #else - if ((ret = dequeue_signal(task, &set, &__info)) != 0) { + if (dequeue_signal(task, &set, &__info) != 0) { #endif #ifdef HAVE_SIGNAL_STOP spin_unlock_irq(&task->sighand->siglock); diff --git a/sys/contrib/openzfs/module/os/linux/spl/spl-zone.c b/sys/contrib/openzfs/module/os/linux/spl/spl-zone.c index b489179f1257..e821fbb4f3a1 100644 --- a/sys/contrib/openzfs/module/os/linux/spl/spl-zone.c +++ b/sys/contrib/openzfs/module/os/linux/spl/spl-zone.c @@ -50,7 +50,7 @@ typedef struct zone_datasets { typedef struct zone_dataset { struct list_head zd_list; /* zone_dataset linkage */ size_t zd_dsnamelen; /* length of name */ - char zd_dsname[0]; /* name of the member dataset */ + char zd_dsname[]; /* name of the member dataset */ } zone_dataset_t; #if defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) diff --git a/sys/contrib/openzfs/module/os/linux/zfs/vdev_disk.c b/sys/contrib/openzfs/module/os/linux/zfs/vdev_disk.c index 4f33009f14d4..925ee9d9fe9c 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/vdev_disk.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/vdev_disk.c @@ -71,7 +71,7 @@ typedef struct dio_request { atomic_t dr_ref; /* References */ int dr_error; /* Bio error */ int dr_bio_count; /* Count of bio's */ - struct bio *dr_bio[0]; /* Attached bio's */ + struct bio *dr_bio[]; /* Attached bio's */ } dio_request_t; /* @@ -425,7 +425,7 @@ vdev_disk_dio_get(dio_request_t *dr) atomic_inc(&dr->dr_ref); } -static int +static void vdev_disk_dio_put(dio_request_t *dr) { int rc = atomic_dec_return(&dr->dr_ref); @@ -449,14 +449,11 @@ vdev_disk_dio_put(dio_request_t *dr) zio_delay_interrupt(zio); } } - - return (rc); } BIO_END_IO_PROTO(vdev_disk_physio_completion, bio, error) { dio_request_t *dr = bio->bi_private; - int rc; if (dr->dr_error == 0) { #ifdef HAVE_1ARG_BIO_END_IO_T @@ -470,7 +467,7 @@ BIO_END_IO_PROTO(vdev_disk_physio_completion, bio, error) } /* Drop reference acquired by __vdev_disk_physio */ - rc = vdev_disk_dio_put(dr); + vdev_disk_dio_put(dr); } static inline void @@ -665,7 +662,7 @@ __vdev_disk_physio(struct block_device *bdev, zio_t *zio, retry: dr = vdev_disk_dio_alloc(bio_count); - if (zio && !(zio->io_flags & (ZIO_FLAG_IO_RETRY | ZIO_FLAG_TRYHARD)) && + if (!(zio->io_flags & (ZIO_FLAG_IO_RETRY | ZIO_FLAG_TRYHARD)) && zio->io_vd->vdev_failfast == B_TRUE) { bio_set_flags_failfast(bdev, &flags, zfs_vdev_failfast_mask & 1, zfs_vdev_failfast_mask & 2, zfs_vdev_failfast_mask & 4); @@ -742,7 +739,7 @@ __vdev_disk_physio(struct block_device *bdev, zio_t *zio, if (dr->dr_bio_count > 1) blk_finish_plug(&plug); - (void) vdev_disk_dio_put(dr); + vdev_disk_dio_put(dr); return (error); } diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_acl.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_acl.c index 89bfa02af768..db1bb9577197 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_acl.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_acl.c @@ -2233,8 +2233,7 @@ static int zfs_zaccess_dataset_check(znode_t *zp, uint32_t v4_mode) { if ((v4_mode & WRITE_MASK) && (zfs_is_readonly(ZTOZSB(zp))) && - (!Z_ISDEV(ZTOI(zp)->i_mode) || - (Z_ISDEV(ZTOI(zp)->i_mode) && (v4_mode & WRITE_MASK_ATTRS)))) { + (!Z_ISDEV(ZTOI(zp)->i_mode) || (v4_mode & WRITE_MASK_ATTRS))) { return (SET_ERROR(EROFS)); } @@ -2582,7 +2581,6 @@ zfs_fastaccesschk_execute(znode_t *zdp, cred_t *cr) } if (uid == KUID_TO_SUID(ZTOI(zdp)->i_uid)) { - owner = B_TRUE; if (zdp->z_mode & S_IXUSR) { mutex_exit(&zdp->z_acl_lock); return (0); @@ -2592,7 +2590,6 @@ zfs_fastaccesschk_execute(znode_t *zdp, cred_t *cr) } } if (groupmember(KGID_TO_SGID(ZTOI(zdp)->i_gid), cr)) { - groupmbr = B_TRUE; if (zdp->z_mode & S_IXGRP) { mutex_exit(&zdp->z_acl_lock); return (0); @@ -2721,7 +2718,6 @@ zfs_zaccess(znode_t *zp, int mode, int flags, boolean_t skipaclchk, cred_t *cr, * read_acl/read_attributes */ - error = 0; ASSERT(working_mode != 0); if ((working_mode & (ACE_READ_ACL|ACE_READ_ATTRIBUTES) && diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_debug.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_debug.c index e5a600250659..b090ec684e05 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_debug.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_debug.c @@ -29,8 +29,8 @@ typedef struct zfs_dbgmsg { procfs_list_node_t zdm_node; uint64_t zdm_timestamp; - uint_t zdm_size; - char zdm_msg[1]; /* variable length allocation */ + uint_t zdm_size; + char zdm_msg[]; /* variable length allocation */ } zfs_dbgmsg_t; static procfs_list_t zfs_dbgmsgs; @@ -135,7 +135,7 @@ __set_error(const char *file, const char *func, int line, int err) void __zfs_dbgmsg(char *buf) { - uint_t size = sizeof (zfs_dbgmsg_t) + strlen(buf); + uint_t size = sizeof (zfs_dbgmsg_t) + strlen(buf) + 1; zfs_dbgmsg_t *zdm = kmem_zalloc(size, KM_SLEEP); zdm->zdm_size = size; zdm->zdm_timestamp = gethrestime_sec(); diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_dir.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_dir.c index 85aa94d8df6a..1fec4ea09317 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_dir.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_dir.c @@ -649,6 +649,8 @@ zfs_rmnode(znode_t *zp) objset_t *os = zfsvfs->z_os; znode_t *xzp = NULL; dmu_tx_t *tx; + znode_hold_t *zh; + uint64_t z_id = zp->z_id; uint64_t acl_obj; uint64_t xattr_obj; uint64_t links; @@ -666,8 +668,9 @@ zfs_rmnode(znode_t *zp) * Not enough space to delete some xattrs. * Leave it in the unlinked set. */ + zh = zfs_znode_hold_enter(zfsvfs, z_id); zfs_znode_dmu_fini(zp); - + zfs_znode_hold_exit(zfsvfs, zh); return; } } @@ -686,7 +689,9 @@ zfs_rmnode(znode_t *zp) * Not enough space or we were interrupted by unmount. * Leave the file in the unlinked set. */ + zh = zfs_znode_hold_enter(zfsvfs, z_id); zfs_znode_dmu_fini(zp); + zfs_znode_hold_exit(zfsvfs, zh); return; } } @@ -726,7 +731,9 @@ zfs_rmnode(znode_t *zp) * which point we'll call zfs_unlinked_drain() to process it). */ dmu_tx_abort(tx); + zh = zfs_znode_hold_enter(zfsvfs, z_id); zfs_znode_dmu_fini(zp); + zfs_znode_hold_exit(zfsvfs, zh); goto out; } @@ -1112,10 +1119,6 @@ zfs_make_xattrdir(znode_t *zp, vattr_t *vap, znode_t **xzpp, cred_t *cr) *xzpp = NULL; - if ((error = zfs_zaccess(zp, ACE_WRITE_NAMED_ATTRS, 0, B_FALSE, cr, - kcred->user_ns))) - return (error); - if ((error = zfs_acl_ids_create(zp, IS_XATTR, vap, cr, NULL, &acl_ids, kcred->user_ns)) != 0) return (error); diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c index 94ae5e91f1c4..47f132a38abe 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c @@ -555,6 +555,7 @@ zfs_create(znode_t *dzp, char *name, vattr_t *vap, int excl, boolean_t fuid_dirtied; boolean_t have_acl = B_FALSE; boolean_t waited = B_FALSE; + boolean_t skip_acl = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; /* * If we have an ephemeral id, ACL, or XVATTR then @@ -627,7 +628,7 @@ zfs_create(znode_t *dzp, char *name, vattr_t *vap, int excl, * Create a new file object and update the directory * to reference it. */ - if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr, + if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, skip_acl, cr, mnt_ns))) { if (have_acl) zfs_acl_ids_free(&acl_ids); @@ -720,7 +721,6 @@ zfs_create(znode_t *dzp, char *name, vattr_t *vap, int excl, if (have_acl) zfs_acl_ids_free(&acl_ids); - have_acl = B_FALSE; /* * A directory entry already exists for this name. @@ -2531,7 +2531,7 @@ zfs_setattr(znode_t *zp, vattr_t *vap, int flags, cred_t *cr, zuserns_t *mnt_ns) dmu_tx_commit(tx); if (attrzp) { if (err2 == 0 && handle_eadir) - err2 = zfs_setattr_dir(attrzp); + err = zfs_setattr_dir(attrzp); zrele(attrzp); } zfs_znode_update_vfs(zp); diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_znode.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_znode.c index d673da38463b..1faf25d93cc7 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_znode.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_znode.c @@ -271,7 +271,7 @@ zfs_znode_held(zfsvfs_t *zfsvfs, uint64_t obj) return (held); } -static znode_hold_t * +znode_hold_t * zfs_znode_hold_enter(zfsvfs_t *zfsvfs, uint64_t obj) { znode_hold_t *zh, *zh_new, search; @@ -304,7 +304,7 @@ zfs_znode_hold_enter(zfsvfs_t *zfsvfs, uint64_t obj) return (zh); } -static void +void zfs_znode_hold_exit(zfsvfs_t *zfsvfs, znode_hold_t *zh) { int i = ZFS_OBJ_HASH(zfsvfs, zh->zh_obj); @@ -357,7 +357,7 @@ zfs_znode_sa_init(zfsvfs_t *zfsvfs, znode_t *zp, void zfs_znode_dmu_fini(znode_t *zp) { - ASSERT(zfs_znode_held(ZTOZSB(zp), zp->z_id) || zp->z_unlinked || + ASSERT(zfs_znode_held(ZTOZSB(zp), zp->z_id) || RW_WRITE_HELD(&ZTOZSB(zp)->z_teardown_inactive_lock)); sa_handle_destroy(zp->z_sa_hdl); @@ -495,13 +495,11 @@ zfs_set_inode_flags(znode_t *zp, struct inode *ip) void zfs_znode_update_vfs(znode_t *zp) { - zfsvfs_t *zfsvfs; struct inode *ip; uint32_t blksize; u_longlong_t i_blocks; ASSERT(zp != NULL); - zfsvfs = ZTOZSB(zp); ip = ZTOI(zp); /* Skip .zfs control nodes which do not exist on disk. */ @@ -1885,6 +1883,7 @@ zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx) } ASSERT(version != 0); error = zap_update(os, moid, ZPL_VERSION_STR, 8, 1, &version, tx); + ASSERT(error == 0); /* * Create zap object used for SA attribute registration diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zpl_inode.c b/sys/contrib/openzfs/module/os/linux/zfs/zpl_inode.c index 93eae7201506..993447e54683 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zpl_inode.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zpl_inode.c @@ -234,12 +234,17 @@ zpl_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, #ifdef HAVE_TMPFILE static int +#ifndef HAVE_TMPFILE_DENTRY +zpl_tmpfile(struct user_namespace *userns, struct inode *dir, + struct file *file, umode_t mode) +#else #ifdef HAVE_TMPFILE_USERNS zpl_tmpfile(struct user_namespace *userns, struct inode *dir, struct dentry *dentry, umode_t mode) #else zpl_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) #endif +#endif { cred_t *cr = CRED(); struct inode *ip; @@ -265,11 +270,21 @@ zpl_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) if (error == 0) { /* d_tmpfile will do drop_nlink, so we should set it first */ set_nlink(ip, 1); +#ifndef HAVE_TMPFILE_DENTRY + d_tmpfile(file, ip); + + error = zpl_xattr_security_init(ip, dir, + &file->f_path.dentry->d_name); +#else d_tmpfile(dentry, ip); error = zpl_xattr_security_init(ip, dir, &dentry->d_name); +#endif if (error == 0) error = zpl_init_acl(ip, dir); +#ifndef HAVE_TMPFILE_DENTRY + error = finish_open_simple(file, error); +#endif /* * don't need to handle error here, file is already in * unlinked set. @@ -761,7 +776,11 @@ const struct inode_operations zpl_inode_operations = { #if defined(HAVE_SET_ACL) .set_acl = zpl_set_acl, #endif /* HAVE_SET_ACL */ +#if defined(HAVE_GET_INODE_ACL) + .get_inode_acl = zpl_get_acl, +#else .get_acl = zpl_get_acl, +#endif /* HAVE_GET_INODE_ACL */ #endif /* CONFIG_FS_POSIX_ACL */ }; @@ -801,7 +820,11 @@ const struct inode_operations zpl_dir_inode_operations = { #if defined(HAVE_SET_ACL) .set_acl = zpl_set_acl, #endif /* HAVE_SET_ACL */ +#if defined(HAVE_GET_INODE_ACL) + .get_inode_acl = zpl_get_acl, +#else .get_acl = zpl_get_acl, +#endif /* HAVE_GET_INODE_ACL */ #endif /* CONFIG_FS_POSIX_ACL */ #ifdef HAVE_RENAME2_OPERATIONS_WRAPPER }, @@ -844,6 +867,10 @@ const struct inode_operations zpl_special_inode_operations = { #if defined(HAVE_SET_ACL) .set_acl = zpl_set_acl, #endif /* HAVE_SET_ACL */ +#if defined(HAVE_GET_INODE_ACL) + .get_inode_acl = zpl_get_acl, +#else .get_acl = zpl_get_acl, +#endif /* HAVE_GET_INODE_ACL */ #endif /* CONFIG_FS_POSIX_ACL */ }; diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zpl_xattr.c b/sys/contrib/openzfs/module/os/linux/zfs/zpl_xattr.c index 99d9b3793f29..4156d686732a 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zpl_xattr.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zpl_xattr.c @@ -499,7 +499,7 @@ zpl_xattr_set_dir(struct inode *ip, const char *name, const void *value, vap->va_gid = crgetgid(cr); error = -zfs_create(dxzp, (char *)name, vap, 0, 0644, &xzp, - cr, 0, NULL, kcred->user_ns); + cr, ATTR_NOACLCHECK, NULL, kcred->user_ns); if (error) goto out; } @@ -1061,11 +1061,18 @@ int #ifdef HAVE_SET_ACL_USERNS zpl_set_acl(struct user_namespace *userns, struct inode *ip, struct posix_acl *acl, int type) +#elif defined(HAVE_SET_ACL_USERNS_DENTRY_ARG2) +zpl_set_acl(struct user_namespace *userns, struct dentry *dentry, + struct posix_acl *acl, int type) #else zpl_set_acl(struct inode *ip, struct posix_acl *acl, int type) #endif /* HAVE_SET_ACL_USERNS */ { +#ifdef HAVE_SET_ACL_USERNS_DENTRY_ARG2 + return (zpl_set_acl_impl(d_inode(dentry), acl, type)); +#else return (zpl_set_acl_impl(ip, acl, type)); +#endif /* HAVE_SET_ACL_USERNS_DENTRY_ARG2 */ } #endif /* HAVE_SET_ACL */ @@ -1124,7 +1131,7 @@ zpl_get_acl_impl(struct inode *ip, int type) return (acl); } -#if defined(HAVE_GET_ACL_RCU) +#if defined(HAVE_GET_ACL_RCU) || defined(HAVE_GET_INODE_ACL) struct posix_acl * zpl_get_acl(struct inode *ip, int type, bool rcu) { diff --git a/sys/contrib/openzfs/module/unicode/u8_textprep.c b/sys/contrib/openzfs/module/unicode/u8_textprep.c index 1940ee510d2c..49e22c88cde7 100644 --- a/sys/contrib/openzfs/module/unicode/u8_textprep.c +++ b/sys/contrib/openzfs/module/unicode/u8_textprep.c @@ -23,6 +23,9 @@ * Use is subject to license terms. */ +/* + * Copyright 2022 MNX Cloud, Inc. + */ @@ -213,10 +216,10 @@ static const int8_t u8_number_of_bytes[0x100] = { /* 80 81 82 83 84 85 86 87 88 89 8A 8B 8C 8D 8E 8F */ I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, -/* 90 91 92 93 94 95 96 97 98 99 9A 9B 9C 9D 9E 9F */ +/* 90 91 92 93 94 95 96 97 98 99 9A 9B 9C 9D 9E 9F */ I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, -/* A0 A1 A2 A3 A4 A5 A6 A7 A8 A9 AA AB AC AD AE AF */ +/* A0 A1 A2 A3 A4 A5 A6 A7 A8 A9 AA AB AC AD AE AF */ I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, /* B0 B1 B2 B3 B4 B5 B6 B7 B8 B9 BA BB BC BD BE BF */ @@ -1286,8 +1289,12 @@ do_composition(size_t uv, uchar_t *s, uchar_t *comb_class, uchar_t *start, saved_l = l - disp[last]; while (p < oslast) { - size = u8_number_of_bytes[*p]; - if (size <= 1 || (p + size) > oslast) + int8_t number_of_bytes = u8_number_of_bytes[*p]; + + if (number_of_bytes <= 1) + break; + size = number_of_bytes; + if ((p + size) > oslast) break; saved_p = p; @@ -1378,8 +1385,10 @@ do_composition(size_t uv, uchar_t *s, uchar_t *comb_class, uchar_t *start, */ static size_t collect_a_seq(size_t uv, uchar_t *u8s, uchar_t **source, uchar_t *slast, - boolean_t is_it_toupper, boolean_t is_it_tolower, - boolean_t canonical_decomposition, boolean_t compatibility_decomposition, + boolean_t is_it_toupper, + boolean_t is_it_tolower, + boolean_t canonical_decomposition, + boolean_t compatibility_decomposition, boolean_t canonical_composition, int *errnum, u8_normalization_states_t *state) { diff --git a/sys/contrib/openzfs/module/zcommon/zfs_fletcher.c b/sys/contrib/openzfs/module/zcommon/zfs_fletcher.c index 44c8f486f6d8..fa9b8447e983 100644 --- a/sys/contrib/openzfs/module/zcommon/zfs_fletcher.c +++ b/sys/contrib/openzfs/module/zcommon/zfs_fletcher.c @@ -628,7 +628,7 @@ fletcher_4_kstat_data(char *buf, size_t size, void *data) off += snprintf(buf + off, size - off, "%-17s", "fastest"); off += snprintf(buf + off, size - off, "%-15s", fletcher_4_supp_impls[fastest_stat->native]->name); - off += snprintf(buf + off, size - off, "%-15s\n", + (void) snprintf(buf + off, size - off, "%-15s\n", fletcher_4_supp_impls[fastest_stat->byteswap]->name); } else { ptrdiff_t id = curr_stat - fletcher_4_stat_data; @@ -637,7 +637,7 @@ fletcher_4_kstat_data(char *buf, size_t size, void *data) fletcher_4_supp_impls[id]->name); off += snprintf(buf + off, size - off, "%-15llu", (u_longlong_t)curr_stat->native); - off += snprintf(buf + off, size - off, "%-15llu\n", + (void) snprintf(buf + off, size - off, "%-15llu\n", (u_longlong_t)curr_stat->byteswap); } diff --git a/sys/contrib/openzfs/module/zcommon/zpool_prop.c b/sys/contrib/openzfs/module/zcommon/zpool_prop.c index 285b97909631..e99acef5a8fb 100644 --- a/sys/contrib/openzfs/module/zcommon/zpool_prop.c +++ b/sys/contrib/openzfs/module/zcommon/zpool_prop.c @@ -410,6 +410,18 @@ vdev_prop_init(void) sfeatures); /* default numeric properties */ + zprop_register_number(VDEV_PROP_CHECKSUM_N, "checksum_n", UINT64_MAX, + PROP_DEFAULT, ZFS_TYPE_VDEV, "", "CKSUM_N", B_FALSE, + sfeatures); + zprop_register_number(VDEV_PROP_CHECKSUM_T, "checksum_t", UINT64_MAX, + PROP_DEFAULT, ZFS_TYPE_VDEV, "", "CKSUM_T", B_FALSE, + sfeatures); + zprop_register_number(VDEV_PROP_IO_N, "io_n", UINT64_MAX, + PROP_DEFAULT, ZFS_TYPE_VDEV, "", "IO_N", B_FALSE, + sfeatures); + zprop_register_number(VDEV_PROP_IO_T, "io_t", UINT64_MAX, + PROP_DEFAULT, ZFS_TYPE_VDEV, "", "IO_T", B_FALSE, + sfeatures); /* default index (boolean) properties */ zprop_register_index(VDEV_PROP_REMOVING, "removing", 0, diff --git a/sys/contrib/openzfs/module/zfs/arc.c b/sys/contrib/openzfs/module/zfs/arc.c index f51f427c1bfd..2a52d0d24572 100644 --- a/sys/contrib/openzfs/module/zfs/arc.c +++ b/sys/contrib/openzfs/module/zfs/arc.c @@ -333,6 +333,7 @@ static int arc_state_evict_marker_count; static kmutex_t arc_evict_lock; static boolean_t arc_evict_needed = B_FALSE; +static clock_t arc_last_uncached_flush; /* * Count of bytes evicted since boot. @@ -473,29 +474,36 @@ static uint_t zfs_arc_lotsfree_percent = 10; */ static int zfs_arc_prune_task_threads = 1; -/* The 6 states: */ +/* The 7 states: */ arc_state_t ARC_anon; arc_state_t ARC_mru; arc_state_t ARC_mru_ghost; arc_state_t ARC_mfu; arc_state_t ARC_mfu_ghost; arc_state_t ARC_l2c_only; +arc_state_t ARC_uncached; arc_stats_t arc_stats = { { "hits", KSTAT_DATA_UINT64 }, + { "iohits", KSTAT_DATA_UINT64 }, { "misses", KSTAT_DATA_UINT64 }, { "demand_data_hits", KSTAT_DATA_UINT64 }, + { "demand_data_iohits", KSTAT_DATA_UINT64 }, { "demand_data_misses", KSTAT_DATA_UINT64 }, { "demand_metadata_hits", KSTAT_DATA_UINT64 }, + { "demand_metadata_iohits", KSTAT_DATA_UINT64 }, { "demand_metadata_misses", KSTAT_DATA_UINT64 }, { "prefetch_data_hits", KSTAT_DATA_UINT64 }, + { "prefetch_data_iohits", KSTAT_DATA_UINT64 }, { "prefetch_data_misses", KSTAT_DATA_UINT64 }, { "prefetch_metadata_hits", KSTAT_DATA_UINT64 }, + { "prefetch_metadata_iohits", KSTAT_DATA_UINT64 }, { "prefetch_metadata_misses", KSTAT_DATA_UINT64 }, { "mru_hits", KSTAT_DATA_UINT64 }, { "mru_ghost_hits", KSTAT_DATA_UINT64 }, { "mfu_hits", KSTAT_DATA_UINT64 }, { "mfu_ghost_hits", KSTAT_DATA_UINT64 }, + { "uncached_hits", KSTAT_DATA_UINT64 }, { "deleted", KSTAT_DATA_UINT64 }, { "mutex_miss", KSTAT_DATA_UINT64 }, { "access_skip", KSTAT_DATA_UINT64 }, @@ -544,6 +552,9 @@ arc_stats_t arc_stats = { { "mfu_ghost_size", KSTAT_DATA_UINT64 }, { "mfu_ghost_evictable_data", KSTAT_DATA_UINT64 }, { "mfu_ghost_evictable_metadata", KSTAT_DATA_UINT64 }, + { "uncached_size", KSTAT_DATA_UINT64 }, + { "uncached_evictable_data", KSTAT_DATA_UINT64 }, + { "uncached_evictable_metadata", KSTAT_DATA_UINT64 }, { "l2_hits", KSTAT_DATA_UINT64 }, { "l2_misses", KSTAT_DATA_UINT64 }, { "l2_prefetch_asize", KSTAT_DATA_UINT64 }, @@ -601,8 +612,12 @@ arc_stats_t arc_stats = { { "arc_meta_max", KSTAT_DATA_UINT64 }, { "arc_meta_min", KSTAT_DATA_UINT64 }, { "async_upgrade_sync", KSTAT_DATA_UINT64 }, + { "predictive_prefetch", KSTAT_DATA_UINT64 }, { "demand_hit_predictive_prefetch", KSTAT_DATA_UINT64 }, + { "demand_iohit_predictive_prefetch", KSTAT_DATA_UINT64 }, + { "prescient_prefetch", KSTAT_DATA_UINT64 }, { "demand_hit_prescient_prefetch", KSTAT_DATA_UINT64 }, + { "demand_iohit_prescient_prefetch", KSTAT_DATA_UINT64 }, { "arc_need_free", KSTAT_DATA_UINT64 }, { "arc_sys_free", KSTAT_DATA_UINT64 }, { "arc_raw_size", KSTAT_DATA_UINT64 }, @@ -693,6 +708,7 @@ taskq_t *arc_prune_taskq; ((hdr)->b_flags & ARC_FLAG_COMPRESSED_ARC) #define HDR_L2CACHE(hdr) ((hdr)->b_flags & ARC_FLAG_L2CACHE) +#define HDR_UNCACHED(hdr) ((hdr)->b_flags & ARC_FLAG_UNCACHED) #define HDR_L2_READING(hdr) \ (((hdr)->b_flags & ARC_FLAG_IO_IN_PROGRESS) && \ ((hdr)->b_flags & ARC_FLAG_HAS_L2HDR)) @@ -845,6 +861,7 @@ enum arc_hdr_alloc_flags { ARC_HDR_ALLOC_RDATA = 0x1, ARC_HDR_DO_ADAPT = 0x2, ARC_HDR_USE_RESERVE = 0x4, + ARC_HDR_ALLOC_LINEAR = 0x8, }; @@ -857,8 +874,10 @@ static void arc_free_data_impl(arc_buf_hdr_t *hdr, uint64_t size, const void *tag); static void arc_hdr_free_abd(arc_buf_hdr_t *, boolean_t); static void arc_hdr_alloc_abd(arc_buf_hdr_t *, int); -static void arc_access(arc_buf_hdr_t *, kmutex_t *); +static void arc_hdr_destroy(arc_buf_hdr_t *); +static void arc_access(arc_buf_hdr_t *, arc_flags_t, boolean_t); static void arc_buf_watch(arc_buf_t *); +static void arc_change_state(arc_state_t *, arc_buf_hdr_t *); static arc_buf_contents_t arc_buf_type(arc_buf_hdr_t *); static uint32_t arc_bufc_to_flags(arc_buf_contents_t); @@ -1137,10 +1156,11 @@ hdr_full_cons(void *vbuf, void *unused, int kmflag) hdr->b_l1hdr.b_byteswap = DMU_BSWAP_NUMFUNCS; cv_init(&hdr->b_l1hdr.b_cv, NULL, CV_DEFAULT, NULL); zfs_refcount_create(&hdr->b_l1hdr.b_refcnt); +#ifdef ZFS_DEBUG mutex_init(&hdr->b_l1hdr.b_freeze_lock, NULL, MUTEX_DEFAULT, NULL); - list_link_init(&hdr->b_l1hdr.b_arc_node); - list_link_init(&hdr->b_l2hdr.b_l2node); +#endif multilist_link_init(&hdr->b_l1hdr.b_arc_node); + list_link_init(&hdr->b_l2hdr.b_l2node); arc_space_consume(HDR_FULL_SIZE, ARC_SPACE_HDRS); return (0); @@ -1178,7 +1198,6 @@ buf_cons(void *vbuf, void *unused, int kmflag) arc_buf_t *buf = vbuf; memset(buf, 0, sizeof (arc_buf_t)); - mutex_init(&buf->b_evict_lock, NULL, MUTEX_DEFAULT, NULL); arc_space_consume(sizeof (arc_buf_t), ARC_SPACE_HDRS); return (0); @@ -1197,7 +1216,9 @@ hdr_full_dest(void *vbuf, void *unused) ASSERT(HDR_EMPTY(hdr)); cv_destroy(&hdr->b_l1hdr.b_cv); zfs_refcount_destroy(&hdr->b_l1hdr.b_refcnt); +#ifdef ZFS_DEBUG mutex_destroy(&hdr->b_l1hdr.b_freeze_lock); +#endif ASSERT(!multilist_link_active(&hdr->b_l1hdr.b_arc_node)); arc_space_return(HDR_FULL_SIZE, ARC_SPACE_HDRS); } @@ -1226,9 +1247,8 @@ static void buf_dest(void *vbuf, void *unused) { (void) unused; - arc_buf_t *buf = vbuf; + (void) vbuf; - mutex_destroy(&buf->b_evict_lock); arc_space_return(sizeof (arc_buf_t), ARC_SPACE_HDRS); } @@ -1396,6 +1416,7 @@ arc_buf_is_shared(arc_buf_t *buf) static inline void arc_cksum_free(arc_buf_hdr_t *hdr) { +#ifdef ZFS_DEBUG ASSERT(HDR_HAS_L1HDR(hdr)); mutex_enter(&hdr->b_l1hdr.b_freeze_lock); @@ -1404,6 +1425,7 @@ arc_cksum_free(arc_buf_hdr_t *hdr) hdr->b_l1hdr.b_freeze_cksum = NULL; } mutex_exit(&hdr->b_l1hdr.b_freeze_lock); +#endif } /* @@ -1432,6 +1454,7 @@ arc_hdr_has_uncompressed_buf(arc_buf_hdr_t *hdr) static void arc_cksum_verify(arc_buf_t *buf) { +#ifdef ZFS_DEBUG arc_buf_hdr_t *hdr = buf->b_hdr; zio_cksum_t zc; @@ -1454,6 +1477,7 @@ arc_cksum_verify(arc_buf_t *buf) if (!ZIO_CHECKSUM_EQUAL(*hdr->b_l1hdr.b_freeze_cksum, zc)) panic("buffer modified while frozen!"); mutex_exit(&hdr->b_l1hdr.b_freeze_lock); +#endif } /* @@ -1494,14 +1518,13 @@ arc_cksum_is_equal(arc_buf_hdr_t *hdr, zio_t *zio) static void arc_cksum_compute(arc_buf_t *buf) { - arc_buf_hdr_t *hdr = buf->b_hdr; - if (!(zfs_flags & ZFS_DEBUG_MODIFY)) return; +#ifdef ZFS_DEBUG + arc_buf_hdr_t *hdr = buf->b_hdr; ASSERT(HDR_HAS_L1HDR(hdr)); - - mutex_enter(&buf->b_hdr->b_l1hdr.b_freeze_lock); + mutex_enter(&hdr->b_l1hdr.b_freeze_lock); if (hdr->b_l1hdr.b_freeze_cksum != NULL || ARC_BUF_COMPRESSED(buf)) { mutex_exit(&hdr->b_l1hdr.b_freeze_lock); return; @@ -1514,6 +1537,7 @@ arc_cksum_compute(arc_buf_t *buf) fletcher_2_native(buf->b_data, arc_buf_size(buf), NULL, hdr->b_l1hdr.b_freeze_cksum); mutex_exit(&hdr->b_l1hdr.b_freeze_lock); +#endif arc_buf_watch(buf); } @@ -1699,12 +1723,14 @@ arc_buf_try_copy_decompressed_data(arc_buf_t *buf) } } +#ifdef ZFS_DEBUG /* * There were no decompressed bufs, so there should not be a * checksum on the hdr either. */ if (zfs_flags & ZFS_DEBUG_MODIFY) EQUIV(!copied, hdr->b_l1hdr.b_freeze_cksum == NULL); +#endif return (copied); } @@ -2283,31 +2309,20 @@ arc_evictable_space_decrement(arc_buf_hdr_t *hdr, arc_state_t *state) static void add_reference(arc_buf_hdr_t *hdr, const void *tag) { - arc_state_t *state; + arc_state_t *state = hdr->b_l1hdr.b_state; ASSERT(HDR_HAS_L1HDR(hdr)); if (!HDR_EMPTY(hdr) && !MUTEX_HELD(HDR_LOCK(hdr))) { - ASSERT(hdr->b_l1hdr.b_state == arc_anon); + ASSERT(state == arc_anon); ASSERT(zfs_refcount_is_zero(&hdr->b_l1hdr.b_refcnt)); ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL); } - state = hdr->b_l1hdr.b_state; - if ((zfs_refcount_add(&hdr->b_l1hdr.b_refcnt, tag) == 1) && - (state != arc_anon)) { + state != arc_anon && state != arc_l2c_only) { /* We don't use the L2-only state list. */ - if (state != arc_l2c_only) { - multilist_remove(&state->arcs_list[arc_buf_type(hdr)], - hdr); - arc_evictable_space_decrement(hdr, state); - } - /* remove the prefetch flag if we get a reference */ - if (HDR_HAS_L2HDR(hdr)) - l2arc_hdr_arcstats_decrement_state(hdr); - arc_hdr_clear_flags(hdr, ARC_FLAG_PREFETCH); - if (HDR_HAS_L2HDR(hdr)) - l2arc_hdr_arcstats_increment_state(hdr); + multilist_remove(&state->arcs_list[arc_buf_type(hdr)], hdr); + arc_evictable_space_decrement(hdr, state); } } @@ -2317,26 +2332,30 @@ add_reference(arc_buf_hdr_t *hdr, const void *tag) * list making it eligible for eviction. */ static int -remove_reference(arc_buf_hdr_t *hdr, kmutex_t *hash_lock, const void *tag) +remove_reference(arc_buf_hdr_t *hdr, const void *tag) { int cnt; arc_state_t *state = hdr->b_l1hdr.b_state; ASSERT(HDR_HAS_L1HDR(hdr)); - ASSERT(state == arc_anon || MUTEX_HELD(hash_lock)); - ASSERT(!GHOST_STATE(state)); + ASSERT(state == arc_anon || MUTEX_HELD(HDR_LOCK(hdr))); + ASSERT(!GHOST_STATE(state)); /* arc_l2c_only counts as a ghost. */ - /* - * arc_l2c_only counts as a ghost state so we don't need to explicitly - * check to prevent usage of the arc_l2c_only list. - */ - if (((cnt = zfs_refcount_remove(&hdr->b_l1hdr.b_refcnt, tag)) == 0) && - (state != arc_anon)) { - multilist_insert(&state->arcs_list[arc_buf_type(hdr)], hdr); - ASSERT3U(hdr->b_l1hdr.b_bufcnt, >, 0); - arc_evictable_space_increment(hdr, state); + if ((cnt = zfs_refcount_remove(&hdr->b_l1hdr.b_refcnt, tag)) != 0) + return (cnt); + + if (state == arc_anon) { + arc_hdr_destroy(hdr); + return (0); } - return (cnt); + if (state == arc_uncached && !HDR_PREFETCH(hdr)) { + arc_change_state(arc_anon, hdr); + arc_hdr_destroy(hdr); + return (0); + } + multilist_insert(&state->arcs_list[arc_buf_type(hdr)], hdr); + arc_evictable_space_increment(hdr, state); + return (0); } /* @@ -2394,8 +2413,7 @@ arc_buf_info(arc_buf_t *ab, arc_buf_info_t *abi, int state_index) * for the buffer must be held by the caller. */ static void -arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr, - kmutex_t *hash_lock) +arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr) { arc_state_t *old_state; int64_t refcnt; @@ -2416,6 +2434,12 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr, bufcnt = hdr->b_l1hdr.b_bufcnt; update_old = (bufcnt > 0 || hdr->b_l1hdr.b_pabd != NULL || HDR_HAS_RABD(hdr)); + + IMPLY(GHOST_STATE(old_state), bufcnt == 0); + IMPLY(GHOST_STATE(new_state), bufcnt == 0); + IMPLY(GHOST_STATE(old_state), hdr->b_l1hdr.b_buf == NULL); + IMPLY(GHOST_STATE(new_state), hdr->b_l1hdr.b_buf == NULL); + IMPLY(old_state == arc_anon, bufcnt <= 1); } else { old_state = arc_l2c_only; refcnt = 0; @@ -2423,11 +2447,13 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr, update_old = B_FALSE; } update_new = update_old; + if (GHOST_STATE(old_state)) + update_old = B_TRUE; + if (GHOST_STATE(new_state)) + update_new = B_TRUE; - ASSERT(MUTEX_HELD(hash_lock)); + ASSERT(MUTEX_HELD(HDR_LOCK(hdr))); ASSERT3P(new_state, !=, old_state); - ASSERT(!GHOST_STATE(new_state) || bufcnt == 0); - ASSERT(old_state != arc_anon || bufcnt <= 1); /* * If this buffer is evictable, transfer it from the @@ -2436,14 +2462,12 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr, if (refcnt == 0) { if (old_state != arc_anon && old_state != arc_l2c_only) { ASSERT(HDR_HAS_L1HDR(hdr)); - multilist_remove(&old_state->arcs_list[buftype], hdr); - - if (GHOST_STATE(old_state)) { - ASSERT0(bufcnt); - ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL); - update_old = B_TRUE; + /* remove_reference() saves on insert. */ + if (multilist_link_active(&hdr->b_l1hdr.b_arc_node)) { + multilist_remove(&old_state->arcs_list[buftype], + hdr); + arc_evictable_space_decrement(hdr, old_state); } - arc_evictable_space_decrement(hdr, old_state); } if (new_state != arc_anon && new_state != arc_l2c_only) { /* @@ -2454,12 +2478,6 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr, */ ASSERT(HDR_HAS_L1HDR(hdr)); multilist_insert(&new_state->arcs_list[buftype], hdr); - - if (GHOST_STATE(new_state)) { - ASSERT0(bufcnt); - ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL); - update_new = B_TRUE; - } arc_evictable_space_increment(hdr, new_state); } } @@ -3291,7 +3309,9 @@ arc_hdr_alloc(uint64_t spa, int32_t psize, int32_t lsize, } ASSERT(HDR_EMPTY(hdr)); +#ifdef ZFS_DEBUG ASSERT3P(hdr->b_l1hdr.b_freeze_cksum, ==, NULL); +#endif HDR_SET_PSIZE(hdr, psize); HDR_SET_LSIZE(hdr, lsize); hdr->b_spa = spa; @@ -3367,7 +3387,9 @@ arc_hdr_realloc(arc_buf_hdr_t *hdr, kmem_cache_t *old, kmem_cache_t *new) } else { ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL); ASSERT0(hdr->b_l1hdr.b_bufcnt); +#ifdef ZFS_DEBUG ASSERT3P(hdr->b_l1hdr.b_freeze_cksum, ==, NULL); +#endif /* * If we've reached here, We must have been called from @@ -3477,7 +3499,9 @@ arc_hdr_realloc_crypt(arc_buf_hdr_t *hdr, boolean_t need_crypt) nhdr->b_psize = hdr->b_psize; nhdr->b_lsize = hdr->b_lsize; nhdr->b_spa = hdr->b_spa; +#ifdef ZFS_DEBUG nhdr->b_l1hdr.b_freeze_cksum = hdr->b_l1hdr.b_freeze_cksum; +#endif nhdr->b_l1hdr.b_bufcnt = hdr->b_l1hdr.b_bufcnt; nhdr->b_l1hdr.b_byteswap = hdr->b_l1hdr.b_byteswap; nhdr->b_l1hdr.b_state = hdr->b_l1hdr.b_state; @@ -3496,11 +3520,8 @@ arc_hdr_realloc_crypt(arc_buf_hdr_t *hdr, boolean_t need_crypt) */ (void) zfs_refcount_add(&nhdr->b_l1hdr.b_refcnt, FTAG); nhdr->b_l1hdr.b_buf = hdr->b_l1hdr.b_buf; - for (buf = nhdr->b_l1hdr.b_buf; buf != NULL; buf = buf->b_next) { - mutex_enter(&buf->b_evict_lock); + for (buf = nhdr->b_l1hdr.b_buf; buf != NULL; buf = buf->b_next) buf->b_hdr = nhdr; - mutex_exit(&buf->b_evict_lock); - } zfs_refcount_transfer(&nhdr->b_l1hdr.b_refcnt, &hdr->b_l1hdr.b_refcnt); (void) zfs_refcount_remove(&nhdr->b_l1hdr.b_refcnt, FTAG); @@ -3520,7 +3541,9 @@ arc_hdr_realloc_crypt(arc_buf_hdr_t *hdr, boolean_t need_crypt) hdr->b_psize = 0; hdr->b_lsize = 0; hdr->b_spa = 0; +#ifdef ZFS_DEBUG hdr->b_l1hdr.b_freeze_cksum = NULL; +#endif hdr->b_l1hdr.b_buf = NULL; hdr->b_l1hdr.b_bufcnt = 0; hdr->b_l1hdr.b_byteswap = 0; @@ -3624,7 +3647,6 @@ arc_alloc_compressed_buf(spa_t *spa, const void *tag, uint64_t psize, VERIFY0(arc_buf_alloc_impl(hdr, spa, NULL, tag, B_FALSE, B_TRUE, B_FALSE, B_FALSE, &buf)); arc_buf_thaw(buf); - ASSERT3P(hdr->b_l1hdr.b_freeze_cksum, ==, NULL); /* * To ensure that the hdr has the correct data in it if we call @@ -3672,7 +3694,6 @@ arc_alloc_raw_buf(spa_t *spa, const void *tag, uint64_t dsobj, VERIFY0(arc_buf_alloc_impl(hdr, spa, NULL, tag, B_TRUE, B_TRUE, B_FALSE, B_FALSE, &buf)); arc_buf_thaw(buf); - ASSERT3P(hdr->b_l1hdr.b_freeze_cksum, ==, NULL); return (buf); } @@ -3834,6 +3855,9 @@ arc_hdr_destroy(arc_buf_hdr_t *hdr) if (HDR_HAS_L1HDR(hdr)) { ASSERT(!multilist_link_active(&hdr->b_l1hdr.b_arc_node)); ASSERT3P(hdr->b_l1hdr.b_acb, ==, NULL); +#ifdef ZFS_DEBUG + ASSERT3P(hdr->b_l1hdr.b_freeze_cksum, ==, NULL); +#endif if (!HDR_PROTECTED(hdr)) { kmem_cache_free(hdr_full_cache, hdr); @@ -3853,8 +3877,7 @@ arc_buf_destroy(arc_buf_t *buf, const void *tag) if (hdr->b_l1hdr.b_state == arc_anon) { ASSERT3U(hdr->b_l1hdr.b_bufcnt, ==, 1); ASSERT(!HDR_IO_IN_PROGRESS(hdr)); - VERIFY0(remove_reference(hdr, NULL, tag)); - arc_hdr_destroy(hdr); + VERIFY0(remove_reference(hdr, tag)); return; } @@ -3867,8 +3890,8 @@ arc_buf_destroy(arc_buf_t *buf, const void *tag) ASSERT3P(hdr->b_l1hdr.b_state, !=, arc_anon); ASSERT3P(buf->b_data, !=, NULL); - (void) remove_reference(hdr, hash_lock, tag); arc_buf_destroy_impl(buf); + (void) remove_reference(hdr, tag); mutex_exit(hash_lock); } @@ -3883,6 +3906,7 @@ arc_buf_destroy(arc_buf_t *buf, const void *tag) * - arc_mru_ghost -> deleted * - arc_mfu_ghost -> arc_l2c_only * - arc_mfu_ghost -> deleted + * - arc_uncached -> deleted * * Return total size of evicted data buffers for eviction progress tracking. * When evicting from ghost states return logical buffer size to make eviction @@ -3894,21 +3918,23 @@ arc_buf_destroy(arc_buf_t *buf, const void *tag) * only the evicted headers size. */ static int64_t -arc_evict_hdr(arc_buf_hdr_t *hdr, kmutex_t *hash_lock, uint64_t *real_evicted) +arc_evict_hdr(arc_buf_hdr_t *hdr, uint64_t *real_evicted) { arc_state_t *evicted_state, *state; int64_t bytes_evicted = 0; uint_t min_lifetime = HDR_PRESCIENT_PREFETCH(hdr) ? arc_min_prescient_prefetch_ms : arc_min_prefetch_ms; - ASSERT(MUTEX_HELD(hash_lock)); + ASSERT(MUTEX_HELD(HDR_LOCK(hdr))); ASSERT(HDR_HAS_L1HDR(hdr)); + ASSERT(!HDR_IO_IN_PROGRESS(hdr)); + ASSERT0(hdr->b_l1hdr.b_bufcnt); + ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL); + ASSERT0(zfs_refcount_count(&hdr->b_l1hdr.b_refcnt)); *real_evicted = 0; state = hdr->b_l1hdr.b_state; if (GHOST_STATE(state)) { - ASSERT(!HDR_IO_IN_PROGRESS(hdr)); - ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL); /* * l2arc_write_buffers() relies on a header's L1 portion @@ -3934,7 +3960,7 @@ arc_evict_hdr(arc_buf_hdr_t *hdr, kmutex_t *hash_lock, uint64_t *real_evicted) * This buffer is cached on the 2nd Level ARC; * don't destroy the header. */ - arc_change_state(arc_l2c_only, hdr, hash_lock); + arc_change_state(arc_l2c_only, hdr); /* * dropping from L1+L2 cached to L2-only, * realloc to remove the L1 header. @@ -3943,40 +3969,25 @@ arc_evict_hdr(arc_buf_hdr_t *hdr, kmutex_t *hash_lock, uint64_t *real_evicted) hdr_l2only_cache); *real_evicted += HDR_FULL_SIZE - HDR_L2ONLY_SIZE; } else { - arc_change_state(arc_anon, hdr, hash_lock); + arc_change_state(arc_anon, hdr); arc_hdr_destroy(hdr); *real_evicted += HDR_FULL_SIZE; } return (bytes_evicted); } - ASSERT(state == arc_mru || state == arc_mfu); - evicted_state = (state == arc_mru) ? arc_mru_ghost : arc_mfu_ghost; + ASSERT(state == arc_mru || state == arc_mfu || state == arc_uncached); + evicted_state = (state == arc_uncached) ? arc_anon : + ((state == arc_mru) ? arc_mru_ghost : arc_mfu_ghost); /* prefetch buffers have a minimum lifespan */ - if (HDR_IO_IN_PROGRESS(hdr) || - ((hdr->b_flags & (ARC_FLAG_PREFETCH | ARC_FLAG_INDIRECT)) && + if ((hdr->b_flags & (ARC_FLAG_PREFETCH | ARC_FLAG_INDIRECT)) && ddi_get_lbolt() - hdr->b_l1hdr.b_arc_access < - MSEC_TO_TICK(min_lifetime))) { + MSEC_TO_TICK(min_lifetime)) { ARCSTAT_BUMP(arcstat_evict_skip); return (bytes_evicted); } - ASSERT0(zfs_refcount_count(&hdr->b_l1hdr.b_refcnt)); - while (hdr->b_l1hdr.b_buf) { - arc_buf_t *buf = hdr->b_l1hdr.b_buf; - if (!mutex_tryenter(&buf->b_evict_lock)) { - ARCSTAT_BUMP(arcstat_mutex_miss); - break; - } - if (buf->b_data != NULL) { - bytes_evicted += HDR_GET_LSIZE(hdr); - *real_evicted += HDR_GET_LSIZE(hdr); - } - mutex_exit(&buf->b_evict_lock); - arc_buf_destroy_impl(buf); - } - if (HDR_HAS_L2HDR(hdr)) { ARCSTAT_INCR(arcstat_evict_l2_cached, HDR_GET_LSIZE(hdr)); } else { @@ -4004,28 +4015,27 @@ arc_evict_hdr(arc_buf_hdr_t *hdr, kmutex_t *hash_lock, uint64_t *real_evicted) } } - if (hdr->b_l1hdr.b_bufcnt == 0) { - arc_cksum_free(hdr); + bytes_evicted += arc_hdr_size(hdr); + *real_evicted += arc_hdr_size(hdr); - bytes_evicted += arc_hdr_size(hdr); - *real_evicted += arc_hdr_size(hdr); + /* + * If this hdr is being evicted and has a compressed buffer then we + * discard it here before we change states. This ensures that the + * accounting is updated correctly in arc_free_data_impl(). + */ + if (hdr->b_l1hdr.b_pabd != NULL) + arc_hdr_free_abd(hdr, B_FALSE); - /* - * If this hdr is being evicted and has a compressed - * buffer then we discard it here before we change states. - * This ensures that the accounting is updated correctly - * in arc_free_data_impl(). - */ - if (hdr->b_l1hdr.b_pabd != NULL) - arc_hdr_free_abd(hdr, B_FALSE); + if (HDR_HAS_RABD(hdr)) + arc_hdr_free_abd(hdr, B_TRUE); - if (HDR_HAS_RABD(hdr)) - arc_hdr_free_abd(hdr, B_TRUE); - - arc_change_state(evicted_state, hdr, hash_lock); + arc_change_state(evicted_state, hdr); + DTRACE_PROBE1(arc__evict, arc_buf_hdr_t *, hdr); + if (evicted_state == arc_anon) { + arc_hdr_destroy(hdr); + *real_evicted += HDR_FULL_SIZE; + } else { ASSERT(HDR_IN_HASH_TABLE(hdr)); - arc_hdr_set_flags(hdr, ARC_FLAG_IN_HASH_TABLE); - DTRACE_PROBE1(arc__evict, arc_buf_hdr_t *, hdr); } return (bytes_evicted); @@ -4110,8 +4120,7 @@ arc_evict_state_impl(multilist_t *ml, int idx, arc_buf_hdr_t *marker, if (mutex_tryenter(hash_lock)) { uint64_t revicted; - uint64_t evicted = arc_evict_hdr(hdr, hash_lock, - &revicted); + uint64_t evicted = arc_evict_hdr(hdr, &revicted); mutex_exit(hash_lock); bytes_evicted += evicted; @@ -4774,7 +4783,7 @@ arc_flush(spa_t *spa, boolean_t retry) * no good way to determine if all of a spa's buffers have been * evicted from an arc state. */ - ASSERT(!retry || spa == 0); + ASSERT(!retry || spa == NULL); if (spa != NULL) guid = spa_load_guid(spa); @@ -4790,6 +4799,9 @@ arc_flush(spa_t *spa, boolean_t retry) (void) arc_flush_state(arc_mfu_ghost, guid, ARC_BUFC_DATA, retry); (void) arc_flush_state(arc_mfu_ghost, guid, ARC_BUFC_METADATA, retry); + + (void) arc_flush_state(arc_uncached, guid, ARC_BUFC_DATA, retry); + (void) arc_flush_state(arc_uncached, guid, ARC_BUFC_METADATA, retry); } void @@ -4921,7 +4933,16 @@ arc_evict_cb_check(void *arg, zthr_t *zthr) * which is held before this function is called, and is held by * arc_wait_for_eviction() when it calls zthr_wakeup(). */ - return (arc_evict_needed); + if (arc_evict_needed) + return (B_TRUE); + + /* + * If we have buffers in uncached state, evict them periodically. + */ + return ((zfs_refcount_count(&arc_uncached->arcs_esize[ARC_BUFC_DATA]) + + zfs_refcount_count(&arc_uncached->arcs_esize[ARC_BUFC_METADATA]) && + ddi_get_lbolt() - arc_last_uncached_flush > + MSEC_TO_TICK(arc_min_prefetch_ms / 2))); } /* @@ -4936,8 +4957,14 @@ arc_evict_cb(void *arg, zthr_t *zthr) uint64_t evicted = 0; fstrans_cookie_t cookie = spl_fstrans_mark(); - /* Evict from cache */ - evicted = arc_evict(); + /* Always try to evict from uncached state. */ + arc_last_uncached_flush = ddi_get_lbolt(); + evicted += arc_flush_state(arc_uncached, 0, ARC_BUFC_DATA, B_FALSE); + evicted += arc_flush_state(arc_uncached, 0, ARC_BUFC_METADATA, B_FALSE); + + /* Evict from other states only if told to. */ + if (arc_evict_needed) + evicted += arc_evict(); /* * If evicted is zero, we couldn't evict anything @@ -5216,12 +5243,10 @@ arc_get_data_abd(arc_buf_hdr_t *hdr, uint64_t size, const void *tag, arc_buf_contents_t type = arc_buf_type(hdr); arc_get_data_impl(hdr, size, tag, alloc_flags); - if (type == ARC_BUFC_METADATA) { - return (abd_alloc(size, B_TRUE)); - } else { - ASSERT(type == ARC_BUFC_DATA); - return (abd_alloc(size, B_FALSE)); - } + if (alloc_flags & ARC_HDR_ALLOC_LINEAR) + return (abd_alloc_linear(size, type == ARC_BUFC_METADATA)); + else + return (abd_alloc(size, type == ARC_BUFC_METADATA)); } static void * @@ -5444,150 +5469,151 @@ arc_free_data_impl(arc_buf_hdr_t *hdr, uint64_t size, const void *tag) /* * This routine is called whenever a buffer is accessed. - * NOTE: the hash lock is dropped in this function. */ static void -arc_access(arc_buf_hdr_t *hdr, kmutex_t *hash_lock) +arc_access(arc_buf_hdr_t *hdr, arc_flags_t arc_flags, boolean_t hit) { - clock_t now; - - ASSERT(MUTEX_HELD(hash_lock)); + ASSERT(MUTEX_HELD(HDR_LOCK(hdr))); ASSERT(HDR_HAS_L1HDR(hdr)); + /* + * Update buffer prefetch status. + */ + boolean_t was_prefetch = HDR_PREFETCH(hdr); + boolean_t now_prefetch = arc_flags & ARC_FLAG_PREFETCH; + if (was_prefetch != now_prefetch) { + if (was_prefetch) { + ARCSTAT_CONDSTAT(hit, demand_hit, demand_iohit, + HDR_PRESCIENT_PREFETCH(hdr), prescient, predictive, + prefetch); + } + if (HDR_HAS_L2HDR(hdr)) + l2arc_hdr_arcstats_decrement_state(hdr); + if (was_prefetch) { + arc_hdr_clear_flags(hdr, + ARC_FLAG_PREFETCH | ARC_FLAG_PRESCIENT_PREFETCH); + } else { + arc_hdr_set_flags(hdr, ARC_FLAG_PREFETCH); + } + if (HDR_HAS_L2HDR(hdr)) + l2arc_hdr_arcstats_increment_state(hdr); + } + if (now_prefetch) { + if (arc_flags & ARC_FLAG_PRESCIENT_PREFETCH) { + arc_hdr_set_flags(hdr, ARC_FLAG_PRESCIENT_PREFETCH); + ARCSTAT_BUMP(arcstat_prescient_prefetch); + } else { + ARCSTAT_BUMP(arcstat_predictive_prefetch); + } + } + if (arc_flags & ARC_FLAG_L2CACHE) + arc_hdr_set_flags(hdr, ARC_FLAG_L2CACHE); + + clock_t now = ddi_get_lbolt(); if (hdr->b_l1hdr.b_state == arc_anon) { + arc_state_t *new_state; /* - * This buffer is not in the cache, and does not - * appear in our "ghost" list. Add the new buffer - * to the MRU state. + * This buffer is not in the cache, and does not appear in + * our "ghost" lists. Add it to the MRU or uncached state. */ - ASSERT0(hdr->b_l1hdr.b_arc_access); - hdr->b_l1hdr.b_arc_access = ddi_get_lbolt(); - DTRACE_PROBE1(new_state__mru, arc_buf_hdr_t *, hdr); - arc_change_state(arc_mru, hdr, hash_lock); - + hdr->b_l1hdr.b_arc_access = now; + if (HDR_UNCACHED(hdr)) { + new_state = arc_uncached; + DTRACE_PROBE1(new_state__uncached, arc_buf_hdr_t *, + hdr); + } else { + new_state = arc_mru; + DTRACE_PROBE1(new_state__mru, arc_buf_hdr_t *, hdr); + } + arc_change_state(new_state, hdr); } else if (hdr->b_l1hdr.b_state == arc_mru) { - now = ddi_get_lbolt(); + /* + * This buffer has been accessed once recently and either + * its read is still in progress or it is in the cache. + */ + if (HDR_IO_IN_PROGRESS(hdr)) { + hdr->b_l1hdr.b_arc_access = now; + return; + } + hdr->b_l1hdr.b_mru_hits++; + ARCSTAT_BUMP(arcstat_mru_hits); /* - * If this buffer is here because of a prefetch, then either: - * - clear the flag if this is a "referencing" read - * (any subsequent access will bump this into the MFU state). - * or - * - move the buffer to the head of the list if this is - * another prefetch (to make it less likely to be evicted). + * If the previous access was a prefetch, then it already + * handled possible promotion, so nothing more to do for now. */ - if (HDR_PREFETCH(hdr) || HDR_PRESCIENT_PREFETCH(hdr)) { - if (zfs_refcount_count(&hdr->b_l1hdr.b_refcnt) == 0) { - /* link protected by hash lock */ - ASSERT(multilist_link_active( - &hdr->b_l1hdr.b_arc_node)); - } else { - if (HDR_HAS_L2HDR(hdr)) - l2arc_hdr_arcstats_decrement_state(hdr); - arc_hdr_clear_flags(hdr, - ARC_FLAG_PREFETCH | - ARC_FLAG_PRESCIENT_PREFETCH); - hdr->b_l1hdr.b_mru_hits++; - ARCSTAT_BUMP(arcstat_mru_hits); - if (HDR_HAS_L2HDR(hdr)) - l2arc_hdr_arcstats_increment_state(hdr); - } + if (was_prefetch) { hdr->b_l1hdr.b_arc_access = now; return; } /* - * This buffer has been "accessed" only once so far, - * but it is still in the cache. Move it to the MFU - * state. + * If more than ARC_MINTIME have passed from the previous + * hit, promote the buffer to the MFU state. */ if (ddi_time_after(now, hdr->b_l1hdr.b_arc_access + ARC_MINTIME)) { - /* - * More than 125ms have passed since we - * instantiated this buffer. Move it to the - * most frequently used state. - */ hdr->b_l1hdr.b_arc_access = now; DTRACE_PROBE1(new_state__mfu, arc_buf_hdr_t *, hdr); - arc_change_state(arc_mfu, hdr, hash_lock); + arc_change_state(arc_mfu, hdr); } - hdr->b_l1hdr.b_mru_hits++; - ARCSTAT_BUMP(arcstat_mru_hits); } else if (hdr->b_l1hdr.b_state == arc_mru_ghost) { arc_state_t *new_state; /* - * This buffer has been "accessed" recently, but - * was evicted from the cache. Move it to the - * MFU state. + * This buffer has been accessed once recently, but was + * evicted from the cache. Would we have bigger MRU, it + * would be an MRU hit, so handle it the same way, except + * we don't need to check the previous access time. */ - if (HDR_PREFETCH(hdr) || HDR_PRESCIENT_PREFETCH(hdr)) { + hdr->b_l1hdr.b_mru_ghost_hits++; + ARCSTAT_BUMP(arcstat_mru_ghost_hits); + hdr->b_l1hdr.b_arc_access = now; + if (was_prefetch) { new_state = arc_mru; - if (zfs_refcount_count(&hdr->b_l1hdr.b_refcnt) > 0) { - if (HDR_HAS_L2HDR(hdr)) - l2arc_hdr_arcstats_decrement_state(hdr); - arc_hdr_clear_flags(hdr, - ARC_FLAG_PREFETCH | - ARC_FLAG_PRESCIENT_PREFETCH); - if (HDR_HAS_L2HDR(hdr)) - l2arc_hdr_arcstats_increment_state(hdr); - } DTRACE_PROBE1(new_state__mru, arc_buf_hdr_t *, hdr); } else { new_state = arc_mfu; DTRACE_PROBE1(new_state__mfu, arc_buf_hdr_t *, hdr); } - - hdr->b_l1hdr.b_arc_access = ddi_get_lbolt(); - arc_change_state(new_state, hdr, hash_lock); - - hdr->b_l1hdr.b_mru_ghost_hits++; - ARCSTAT_BUMP(arcstat_mru_ghost_hits); + arc_change_state(new_state, hdr); } else if (hdr->b_l1hdr.b_state == arc_mfu) { /* - * This buffer has been accessed more than once and is - * still in the cache. Keep it in the MFU state. - * - * NOTE: an add_reference() that occurred when we did - * the arc_read() will have kicked this off the list. - * If it was a prefetch, we will explicitly move it to - * the head of the list now. + * This buffer has been accessed more than once and either + * still in the cache or being restored from one of ghosts. */ - - hdr->b_l1hdr.b_mfu_hits++; - ARCSTAT_BUMP(arcstat_mfu_hits); - hdr->b_l1hdr.b_arc_access = ddi_get_lbolt(); - } else if (hdr->b_l1hdr.b_state == arc_mfu_ghost) { - arc_state_t *new_state = arc_mfu; - /* - * This buffer has been accessed more than once but has - * been evicted from the cache. Move it back to the - * MFU state. - */ - - if (HDR_PREFETCH(hdr) || HDR_PRESCIENT_PREFETCH(hdr)) { - /* - * This is a prefetch access... - * move this block back to the MRU state. - */ - new_state = arc_mru; + if (!HDR_IO_IN_PROGRESS(hdr)) { + hdr->b_l1hdr.b_mfu_hits++; + ARCSTAT_BUMP(arcstat_mfu_hits); } - - hdr->b_l1hdr.b_arc_access = ddi_get_lbolt(); - DTRACE_PROBE1(new_state__mfu, arc_buf_hdr_t *, hdr); - arc_change_state(new_state, hdr, hash_lock); - + hdr->b_l1hdr.b_arc_access = now; + } else if (hdr->b_l1hdr.b_state == arc_mfu_ghost) { + /* + * This buffer has been accessed more than once recently, but + * has been evicted from the cache. Would we have bigger MFU + * it would stay in cache, so move it back to MFU state. + */ hdr->b_l1hdr.b_mfu_ghost_hits++; ARCSTAT_BUMP(arcstat_mfu_ghost_hits); + hdr->b_l1hdr.b_arc_access = now; + DTRACE_PROBE1(new_state__mfu, arc_buf_hdr_t *, hdr); + arc_change_state(arc_mfu, hdr); + } else if (hdr->b_l1hdr.b_state == arc_uncached) { + /* + * This buffer is uncacheable, but we got a hit. Probably + * a demand read after prefetch. Nothing more to do here. + */ + if (!HDR_IO_IN_PROGRESS(hdr)) + ARCSTAT_BUMP(arcstat_uncached_hits); + hdr->b_l1hdr.b_arc_access = now; } else if (hdr->b_l1hdr.b_state == arc_l2c_only) { /* - * This buffer is on the 2nd Level ARC. + * This buffer is on the 2nd Level ARC and was not accessed + * for a long time, so treat it as new and put into MRU. */ - - hdr->b_l1hdr.b_arc_access = ddi_get_lbolt(); - DTRACE_PROBE1(new_state__mfu, arc_buf_hdr_t *, hdr); - arc_change_state(arc_mfu, hdr, hash_lock); + hdr->b_l1hdr.b_arc_access = now; + DTRACE_PROBE1(new_state__mru, arc_buf_hdr_t *, hdr); + arc_change_state(arc_mru, hdr); } else { cmn_err(CE_PANIC, "invalid arc state 0x%p", hdr->b_l1hdr.b_state); @@ -5601,7 +5627,6 @@ arc_access(arc_buf_hdr_t *hdr, kmutex_t *hash_lock) void arc_buf_access(arc_buf_t *buf) { - mutex_enter(&buf->b_evict_lock); arc_buf_hdr_t *hdr = buf->b_hdr; /* @@ -5609,33 +5634,29 @@ arc_buf_access(arc_buf_t *buf) * The header must be checked again under the hash_lock in order * to handle the case where it is concurrently being released. */ - if (hdr->b_l1hdr.b_state == arc_anon || HDR_EMPTY(hdr)) { - mutex_exit(&buf->b_evict_lock); + if (hdr->b_l1hdr.b_state == arc_anon || HDR_EMPTY(hdr)) return; - } kmutex_t *hash_lock = HDR_LOCK(hdr); mutex_enter(hash_lock); if (hdr->b_l1hdr.b_state == arc_anon || HDR_EMPTY(hdr)) { mutex_exit(hash_lock); - mutex_exit(&buf->b_evict_lock); ARCSTAT_BUMP(arcstat_access_skip); return; } - mutex_exit(&buf->b_evict_lock); - ASSERT(hdr->b_l1hdr.b_state == arc_mru || - hdr->b_l1hdr.b_state == arc_mfu); + hdr->b_l1hdr.b_state == arc_mfu || + hdr->b_l1hdr.b_state == arc_uncached); DTRACE_PROBE1(arc__hit, arc_buf_hdr_t *, hdr); - arc_access(hdr, hash_lock); + arc_access(hdr, 0, B_TRUE); mutex_exit(hash_lock); ARCSTAT_BUMP(arcstat_hits); - ARCSTAT_CONDSTAT(!HDR_PREFETCH(hdr) && !HDR_PRESCIENT_PREFETCH(hdr), - demand, prefetch, !HDR_ISTYPE_METADATA(hdr), data, metadata, hits); + ARCSTAT_CONDSTAT(B_TRUE /* demand */, demand, prefetch, + !HDR_ISTYPE_METADATA(hdr), data, metadata, hits); } /* a generic arc_read_done_func_t which you can use */ @@ -5695,7 +5716,6 @@ arc_read_done(zio_t *zio) kmutex_t *hash_lock = NULL; arc_callback_t *callback_list; arc_callback_t *acb; - boolean_t freeable = B_FALSE; /* * The hdr was inserted into hash-table and removed from lists @@ -5768,17 +5788,7 @@ arc_read_done(zio_t *zio) callback_list = hdr->b_l1hdr.b_acb; ASSERT3P(callback_list, !=, NULL); - - if (hash_lock && zio->io_error == 0 && - hdr->b_l1hdr.b_state == arc_anon) { - /* - * Only call arc_access on anonymous buffers. This is because - * if we've issued an I/O for an evicted buffer, we've already - * called arc_access (to prevent any simultaneous readers from - * getting confused). - */ - arc_access(hdr, hash_lock); - } + hdr->b_l1hdr.b_acb = NULL; /* * If a read request has a callback (i.e. acb_done is not NULL), then we @@ -5788,6 +5798,10 @@ arc_read_done(zio_t *zio) */ int callback_cnt = 0; for (acb = callback_list; acb != NULL; acb = acb->acb_next) { + + /* We need the last one to call below in original order. */ + callback_list = acb; + if (!acb->acb_done || acb->acb_nobuf) continue; @@ -5851,23 +5865,14 @@ arc_read_done(zio_t *zio) */ ASSERT(callback_cnt < 2 || hash_lock != NULL); - hdr->b_l1hdr.b_acb = NULL; - arc_hdr_clear_flags(hdr, ARC_FLAG_IO_IN_PROGRESS); - if (callback_cnt == 0) - ASSERT(hdr->b_l1hdr.b_pabd != NULL || HDR_HAS_RABD(hdr)); - - ASSERT(zfs_refcount_is_zero(&hdr->b_l1hdr.b_refcnt) || - callback_list != NULL); - if (zio->io_error == 0) { arc_hdr_verify(hdr, zio->io_bp); } else { arc_hdr_set_flags(hdr, ARC_FLAG_IO_ERROR); if (hdr->b_l1hdr.b_state != arc_anon) - arc_change_state(arc_anon, hdr, hash_lock); + arc_change_state(arc_anon, hdr); if (HDR_IN_HASH_TABLE(hdr)) buf_hash_remove(hdr); - freeable = zfs_refcount_is_zero(&hdr->b_l1hdr.b_refcnt); } /* @@ -5877,18 +5882,11 @@ arc_read_done(zio_t *zio) */ cv_broadcast(&hdr->b_l1hdr.b_cv); - if (hash_lock != NULL) { + arc_hdr_clear_flags(hdr, ARC_FLAG_IO_IN_PROGRESS); + (void) remove_reference(hdr, hdr); + + if (hash_lock != NULL) mutex_exit(hash_lock); - } else { - /* - * This block was freed while we waited for the read to - * complete. It has been removed from the hash table and - * moved to the anonymous state (so that it won't show up - * in the cache). - */ - ASSERT3P(hdr->b_l1hdr.b_state, ==, arc_anon); - freeable = zfs_refcount_is_zero(&hdr->b_l1hdr.b_refcnt); - } /* execute each callback and free its structure */ while ((acb = callback_list) != NULL) { @@ -5912,12 +5910,18 @@ arc_read_done(zio_t *zio) zio_nowait(acb->acb_zio_dummy); } - callback_list = acb->acb_next; - kmem_free(acb, sizeof (arc_callback_t)); + callback_list = acb->acb_prev; + if (acb->acb_wait) { + mutex_enter(&acb->acb_wait_lock); + acb->acb_wait_error = zio->io_error; + acb->acb_wait = B_FALSE; + cv_signal(&acb->acb_wait_cv); + mutex_exit(&acb->acb_wait_lock); + /* acb will be freed by the waiting thread. */ + } else { + kmem_free(acb, sizeof (arc_callback_t)); + } } - - if (freeable) - arc_hdr_destroy(hdr); } /* @@ -6003,12 +6007,10 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, */ if (hdr != NULL && HDR_HAS_L1HDR(hdr) && (HDR_HAS_RABD(hdr) || (hdr->b_l1hdr.b_pabd != NULL && !encrypted_read))) { + boolean_t is_data = !HDR_ISTYPE_METADATA(hdr); arc_buf_t *buf = NULL; - *arc_flags |= ARC_FLAG_CACHED; if (HDR_IO_IN_PROGRESS(hdr)) { - zio_t *head_zio = hdr->b_l1hdr.b_acb->acb_zio_head; - if (*arc_flags & ARC_FLAG_CACHED_ONLY) { mutex_exit(hash_lock); ARCSTAT_BUMP(arcstat_cached_only_in_progress); @@ -6016,6 +6018,7 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, goto out; } + zio_t *head_zio = hdr->b_l1hdr.b_acb->acb_zio_head; ASSERT3P(head_zio, !=, NULL); if ((hdr->b_flags & ARC_FLAG_PRIO_ASYNC_READ) && priority == ZIO_PRIORITY_SYNC_READ) { @@ -6029,38 +6032,28 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_buf_hdr_t *, hdr); ARCSTAT_BUMP(arcstat_async_upgrade_sync); } - if (hdr->b_flags & ARC_FLAG_PREDICTIVE_PREFETCH) { - arc_hdr_clear_flags(hdr, - ARC_FLAG_PREDICTIVE_PREFETCH); - } + + DTRACE_PROBE1(arc__iohit, arc_buf_hdr_t *, hdr); + arc_access(hdr, *arc_flags, B_FALSE); /* * If there are multiple threads reading the same block * and that block is not yet in the ARC, then only one * thread will do the physical I/O and all other * threads will wait until that I/O completes. - * Synchronous reads use the b_cv whereas nowait reads - * register a callback. Both are signalled/called in - * arc_read_done. + * Synchronous reads use the acb_wait_cv whereas nowait + * reads register a callback. Both are signalled/called + * in arc_read_done. * - * Errors of the physical I/O may need to be propagated - * to the pio. For synchronous reads, we simply restart - * this function and it will reassess. Nowait reads + * Errors of the physical I/O may need to be propagated. + * Synchronous read errors are returned here from + * arc_read_done via acb_wait_error. Nowait reads * attach the acb_zio_dummy zio to pio and * arc_read_done propagates the physical I/O's io_error * to acb_zio_dummy, and thereby to pio. */ - - if (*arc_flags & ARC_FLAG_WAIT) { - cv_wait(&hdr->b_l1hdr.b_cv, hash_lock); - mutex_exit(hash_lock); - goto top; - } - ASSERT(*arc_flags & ARC_FLAG_NOWAIT); - - if (done) { - arc_callback_t *acb = NULL; - + arc_callback_t *acb = NULL; + if (done || pio || *arc_flags & ARC_FLAG_WAIT) { acb = kmem_zalloc(sizeof (arc_callback_t), KM_SLEEP); acb->acb_done = done; @@ -6069,46 +6062,53 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, acb->acb_encrypted = encrypted_read; acb->acb_noauth = noauth_read; acb->acb_nobuf = no_buf; + if (*arc_flags & ARC_FLAG_WAIT) { + acb->acb_wait = B_TRUE; + mutex_init(&acb->acb_wait_lock, NULL, + MUTEX_DEFAULT, NULL); + cv_init(&acb->acb_wait_cv, NULL, + CV_DEFAULT, NULL); + } acb->acb_zb = *zb; - if (pio != NULL) + if (pio != NULL) { acb->acb_zio_dummy = zio_null(pio, spa, NULL, NULL, NULL, zio_flags); - - ASSERT3P(acb->acb_done, !=, NULL); + } acb->acb_zio_head = head_zio; acb->acb_next = hdr->b_l1hdr.b_acb; + if (hdr->b_l1hdr.b_acb) + hdr->b_l1hdr.b_acb->acb_prev = acb; hdr->b_l1hdr.b_acb = acb; } mutex_exit(hash_lock); + + ARCSTAT_BUMP(arcstat_iohits); + ARCSTAT_CONDSTAT(!(*arc_flags & ARC_FLAG_PREFETCH), + demand, prefetch, is_data, data, metadata, iohits); + + if (*arc_flags & ARC_FLAG_WAIT) { + mutex_enter(&acb->acb_wait_lock); + while (acb->acb_wait) { + cv_wait(&acb->acb_wait_cv, + &acb->acb_wait_lock); + } + rc = acb->acb_wait_error; + mutex_exit(&acb->acb_wait_lock); + mutex_destroy(&acb->acb_wait_lock); + cv_destroy(&acb->acb_wait_cv); + kmem_free(acb, sizeof (arc_callback_t)); + } goto out; } ASSERT(hdr->b_l1hdr.b_state == arc_mru || - hdr->b_l1hdr.b_state == arc_mfu); + hdr->b_l1hdr.b_state == arc_mfu || + hdr->b_l1hdr.b_state == arc_uncached); + + DTRACE_PROBE1(arc__hit, arc_buf_hdr_t *, hdr); + arc_access(hdr, *arc_flags, B_TRUE); if (done && !no_buf) { - if (hdr->b_flags & ARC_FLAG_PREDICTIVE_PREFETCH) { - /* - * This is a demand read which does not have to - * wait for i/o because we did a predictive - * prefetch i/o for it, which has completed. - */ - DTRACE_PROBE1( - arc__demand__hit__predictive__prefetch, - arc_buf_hdr_t *, hdr); - ARCSTAT_BUMP( - arcstat_demand_hit_predictive_prefetch); - arc_hdr_clear_flags(hdr, - ARC_FLAG_PREDICTIVE_PREFETCH); - } - - if (hdr->b_flags & ARC_FLAG_PRESCIENT_PREFETCH) { - ARCSTAT_BUMP( - arcstat_demand_hit_prescient_prefetch); - arc_hdr_clear_flags(hdr, - ARC_FLAG_PRESCIENT_PREFETCH); - } - ASSERT(!embedded_bp || !BP_IS_HOLE(bp)); /* Get a buf with the desired data in it. */ @@ -6130,34 +6130,20 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, } } if (rc != 0) { - (void) remove_reference(hdr, hash_lock, - private); arc_buf_destroy_impl(buf); buf = NULL; + (void) remove_reference(hdr, private); } /* assert any errors weren't due to unloaded keys */ ASSERT((zio_flags & ZIO_FLAG_SPECULATIVE) || rc != EACCES); - } else if (*arc_flags & ARC_FLAG_PREFETCH && - zfs_refcount_is_zero(&hdr->b_l1hdr.b_refcnt)) { - if (HDR_HAS_L2HDR(hdr)) - l2arc_hdr_arcstats_decrement_state(hdr); - arc_hdr_set_flags(hdr, ARC_FLAG_PREFETCH); - if (HDR_HAS_L2HDR(hdr)) - l2arc_hdr_arcstats_increment_state(hdr); } - DTRACE_PROBE1(arc__hit, arc_buf_hdr_t *, hdr); - arc_access(hdr, hash_lock); - if (*arc_flags & ARC_FLAG_PRESCIENT_PREFETCH) - arc_hdr_set_flags(hdr, ARC_FLAG_PRESCIENT_PREFETCH); - if (*arc_flags & ARC_FLAG_L2CACHE) - arc_hdr_set_flags(hdr, ARC_FLAG_L2CACHE); mutex_exit(hash_lock); ARCSTAT_BUMP(arcstat_hits); - ARCSTAT_CONDSTAT(!HDR_PREFETCH(hdr), - demand, prefetch, !HDR_ISTYPE_METADATA(hdr), - data, metadata, hits); + ARCSTAT_CONDSTAT(!(*arc_flags & ARC_FLAG_PREFETCH), + demand, prefetch, is_data, data, metadata, hits); + *arc_flags |= ARC_FLAG_CACHED; if (done) done(NULL, zb, bp, buf, private); @@ -6201,7 +6187,6 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_hdr_destroy(hdr); goto top; /* restart the IO request */ } - alloc_flags |= ARC_HDR_DO_ADAPT; } else { /* * This block is in the ghost cache or encrypted data @@ -6221,7 +6206,9 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, ASSERT0(zfs_refcount_count( &hdr->b_l1hdr.b_refcnt)); ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL); +#ifdef ZFS_DEBUG ASSERT3P(hdr->b_l1hdr.b_freeze_cksum, ==, NULL); +#endif } else if (HDR_IO_IN_PROGRESS(hdr)) { /* * If this header already had an IO in progress @@ -6236,21 +6223,28 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, mutex_exit(hash_lock); goto top; } - - /* - * This is a delicate dance that we play here. - * This hdr might be in the ghost list so we access - * it to move it out of the ghost list before we - * initiate the read. If it's a prefetch then - * it won't have a callback so we'll remove the - * reference that arc_buf_alloc_impl() created. We - * do this after we've called arc_access() to - * avoid hitting an assert in remove_reference(). - */ - arc_adapt(arc_hdr_size(hdr), hdr->b_l1hdr.b_state); - arc_access(hdr, hash_lock); + } + if (*arc_flags & ARC_FLAG_UNCACHED) { + arc_hdr_set_flags(hdr, ARC_FLAG_UNCACHED); + if (!encrypted_read) + alloc_flags |= ARC_HDR_ALLOC_LINEAR; } + /* + * Call arc_adapt() explicitly before arc_access() to allow + * its logic to balance MRU/MFU based on the original state. + */ + arc_adapt(arc_hdr_size(hdr), hdr->b_l1hdr.b_state); + /* + * Take additional reference for IO_IN_PROGRESS. It stops + * arc_access() from putting this header without any buffers + * and so other references but obviously nonevictable onto + * the evictable list of MRU or MFU state. + */ + add_reference(hdr, hdr); + if (!embedded_bp) + arc_access(hdr, *arc_flags, B_FALSE); + arc_hdr_set_flags(hdr, ARC_FLAG_IO_IN_PROGRESS); arc_hdr_alloc_abd(hdr, alloc_flags); if (encrypted_read) { ASSERT(HDR_HAS_RABD(hdr)); @@ -6277,24 +6271,10 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, zio_flags |= ZIO_FLAG_RAW_ENCRYPT; } - if (*arc_flags & ARC_FLAG_PREFETCH && - zfs_refcount_is_zero(&hdr->b_l1hdr.b_refcnt)) { - if (HDR_HAS_L2HDR(hdr)) - l2arc_hdr_arcstats_decrement_state(hdr); - arc_hdr_set_flags(hdr, ARC_FLAG_PREFETCH); - if (HDR_HAS_L2HDR(hdr)) - l2arc_hdr_arcstats_increment_state(hdr); - } - if (*arc_flags & ARC_FLAG_PRESCIENT_PREFETCH) - arc_hdr_set_flags(hdr, ARC_FLAG_PRESCIENT_PREFETCH); - if (*arc_flags & ARC_FLAG_L2CACHE) - arc_hdr_set_flags(hdr, ARC_FLAG_L2CACHE); if (BP_IS_AUTHENTICATED(bp)) arc_hdr_set_flags(hdr, ARC_FLAG_NOAUTH); if (BP_GET_LEVEL(bp) > 0) arc_hdr_set_flags(hdr, ARC_FLAG_INDIRECT); - if (*arc_flags & ARC_FLAG_PREDICTIVE_PREFETCH) - arc_hdr_set_flags(hdr, ARC_FLAG_PREDICTIVE_PREFETCH); ASSERT(!GHOST_STATE(hdr->b_l1hdr.b_state)); acb = kmem_zalloc(sizeof (arc_callback_t), KM_SLEEP); @@ -6307,7 +6287,6 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, ASSERT3P(hdr->b_l1hdr.b_acb, ==, NULL); hdr->b_l1hdr.b_acb = acb; - arc_hdr_set_flags(hdr, ARC_FLAG_IO_IN_PROGRESS); if (HDR_HAS_L2HDR(hdr) && (vd = hdr->b_l2hdr.b_dev->l2ad_vdev) != NULL) { @@ -6348,7 +6327,7 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, blkptr_t *, bp, uint64_t, lsize, zbookmark_phys_t *, zb); ARCSTAT_BUMP(arcstat_misses); - ARCSTAT_CONDSTAT(!HDR_PREFETCH(hdr), + ARCSTAT_CONDSTAT(!(*arc_flags & ARC_FLAG_PREFETCH), demand, prefetch, !HDR_ISTYPE_METADATA(hdr), data, metadata, misses); zfs_racct_read(size, 1); @@ -6370,7 +6349,8 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, */ if (HDR_HAS_L2HDR(hdr) && !HDR_L2_WRITING(hdr) && !HDR_L2_EVICTED(hdr) && - !(l2arc_noprefetch && HDR_PREFETCH(hdr))) { + !(l2arc_noprefetch && + (*arc_flags & ARC_FLAG_PREFETCH))) { l2arc_read_callback_t *cb; abd_t *abd; uint64_t asize; @@ -6559,10 +6539,8 @@ arc_freed(spa_t *spa, const blkptr_t *bp) /* * We might be trying to free a block that is still doing I/O - * (i.e. prefetch) or has a reference (i.e. a dedup-ed, - * dmu_sync-ed block). If this block is being prefetched, then it - * would still have the ARC_FLAG_IO_IN_PROGRESS flag set on the hdr - * until the I/O completes. A block may also have a reference if it is + * (i.e. prefetch) or has some other reference (i.e. a dedup-ed, + * dmu_sync-ed block). A block may also have a reference if it is * part of a dedup-ed, dmu_synced write. The dmu_sync() function would * have written the new block to its final resting place on disk but * without the dedup flag set. This would have left the hdr in the MRU @@ -6579,9 +6557,9 @@ arc_freed(spa_t *spa, const blkptr_t *bp) * freed. So if we have an I/O in progress, or a reference to * this hdr, then we don't destroy the hdr. */ - if (!HDR_HAS_L1HDR(hdr) || (!HDR_IO_IN_PROGRESS(hdr) && - zfs_refcount_is_zero(&hdr->b_l1hdr.b_refcnt))) { - arc_change_state(arc_anon, hdr, hash_lock); + if (!HDR_HAS_L1HDR(hdr) || + zfs_refcount_is_zero(&hdr->b_l1hdr.b_refcnt)) { + arc_change_state(arc_anon, hdr); arc_hdr_destroy(hdr); mutex_exit(hash_lock); } else { @@ -6607,8 +6585,6 @@ arc_release(arc_buf_t *buf, const void *tag) * But we don't know that information at this level. */ - mutex_enter(&buf->b_evict_lock); - ASSERT(HDR_HAS_L1HDR(hdr)); /* @@ -6617,14 +6593,13 @@ arc_release(arc_buf_t *buf, const void *tag) * linked into the hash table. */ if (hdr->b_l1hdr.b_state == arc_anon) { - mutex_exit(&buf->b_evict_lock); ASSERT(!HDR_IO_IN_PROGRESS(hdr)); ASSERT(!HDR_IN_HASH_TABLE(hdr)); ASSERT(!HDR_HAS_L2HDR(hdr)); ASSERT3U(hdr->b_l1hdr.b_bufcnt, ==, 1); ASSERT3S(zfs_refcount_count(&hdr->b_l1hdr.b_refcnt), ==, 1); - ASSERT(!list_link_active(&hdr->b_l1hdr.b_arc_node)); + ASSERT(!multilist_link_active(&hdr->b_l1hdr.b_arc_node)); hdr->b_l1hdr.b_arc_access = 0; @@ -6684,7 +6659,7 @@ arc_release(arc_buf_t *buf, const void *tag) VERIFY3U(hdr->b_type, ==, type); ASSERT(hdr->b_l1hdr.b_buf != buf || buf->b_next != NULL); - (void) remove_reference(hdr, hash_lock, tag); + VERIFY3S(remove_reference(hdr, tag), >, 0); if (arc_buf_is_shared(buf) && !ARC_BUF_COMPRESSED(buf)) { ASSERT3P(hdr->b_l1hdr.b_buf, !=, buf); @@ -6767,10 +6742,6 @@ arc_release(arc_buf_t *buf, const void *tag) mutex_exit(hash_lock); - /* - * Allocate a new hdr. The new hdr will contain a b_pabd - * buffer which will be freed in arc_write(). - */ nhdr = arc_hdr_alloc(spa, psize, lsize, protected, compress, hdr->b_complevel, type); ASSERT3P(nhdr->b_l1hdr.b_buf, ==, NULL); @@ -6786,11 +6757,9 @@ arc_release(arc_buf_t *buf, const void *tag) (void) zfs_refcount_add(&nhdr->b_l1hdr.b_refcnt, tag); buf->b_hdr = nhdr; - mutex_exit(&buf->b_evict_lock); (void) zfs_refcount_add_many(&arc_anon->arcs_size, arc_buf_size(buf), buf); } else { - mutex_exit(&buf->b_evict_lock); ASSERT(zfs_refcount_count(&hdr->b_l1hdr.b_refcnt) == 1); /* protected by hash lock, or hdr is on arc_anon */ ASSERT(!multilist_link_active(&hdr->b_l1hdr.b_arc_node)); @@ -6799,7 +6768,7 @@ arc_release(arc_buf_t *buf, const void *tag) hdr->b_l1hdr.b_mru_ghost_hits = 0; hdr->b_l1hdr.b_mfu_hits = 0; hdr->b_l1hdr.b_mfu_ghost_hits = 0; - arc_change_state(arc_anon, hdr, hash_lock); + arc_change_state(arc_anon, hdr); hdr->b_l1hdr.b_arc_access = 0; mutex_exit(hash_lock); @@ -6811,25 +6780,15 @@ arc_release(arc_buf_t *buf, const void *tag) int arc_released(arc_buf_t *buf) { - int released; - - mutex_enter(&buf->b_evict_lock); - released = (buf->b_data != NULL && + return (buf->b_data != NULL && buf->b_hdr->b_l1hdr.b_state == arc_anon); - mutex_exit(&buf->b_evict_lock); - return (released); } #ifdef ZFS_DEBUG int arc_referenced(arc_buf_t *buf) { - int referenced; - - mutex_enter(&buf->b_evict_lock); - referenced = (zfs_refcount_count(&buf->b_hdr->b_l1hdr.b_refcnt)); - mutex_exit(&buf->b_evict_lock); - return (referenced); + return (zfs_refcount_count(&buf->b_hdr->b_l1hdr.b_refcnt)); } #endif @@ -6873,10 +6832,12 @@ arc_write_ready(zio_t *zio) callback->awcb_ready(zio, buf, callback->awcb_private); - if (HDR_IO_IN_PROGRESS(hdr)) + if (HDR_IO_IN_PROGRESS(hdr)) { ASSERT(zio->io_flags & ZIO_FLAG_REEXECUTED); - - arc_hdr_set_flags(hdr, ARC_FLAG_IO_IN_PROGRESS); + } else { + arc_hdr_set_flags(hdr, ARC_FLAG_IO_IN_PROGRESS); + add_reference(hdr, hdr); /* For IO_IN_PROGRESS. */ + } if (BP_IS_PROTECTED(bp) != !!HDR_PROTECTED(hdr)) hdr = arc_hdr_realloc_crypt(hdr, BP_IS_PROTECTED(bp)); @@ -6955,7 +6916,8 @@ arc_write_ready(zio_t *zio) arc_hdr_alloc_abd(hdr, ARC_HDR_DO_ADAPT | ARC_HDR_ALLOC_RDATA | ARC_HDR_USE_RESERVE); abd_copy(hdr->b_crypt_hdr.b_rabd, zio->io_abd, psize); - } else if (!abd_size_alloc_linear(arc_buf_size(buf)) || + } else if (!(HDR_UNCACHED(hdr) || + abd_size_alloc_linear(arc_buf_size(buf))) || !arc_can_share(hdr, buf)) { /* * Ideally, we would always copy the io_abd into b_pabd, but the @@ -7063,7 +7025,7 @@ arc_write_done(zio_t *zio) (void *)hdr, (void *)exists); ASSERT(zfs_refcount_is_zero( &exists->b_l1hdr.b_refcnt)); - arc_change_state(arc_anon, exists, hash_lock); + arc_change_state(arc_anon, exists); arc_hdr_destroy(exists); mutex_exit(hash_lock); exists = buf_hash_insert(hdr, &hash_lock); @@ -7083,15 +7045,16 @@ arc_write_done(zio_t *zio) } } arc_hdr_clear_flags(hdr, ARC_FLAG_IO_IN_PROGRESS); + VERIFY3S(remove_reference(hdr, hdr), >, 0); /* if it's not anon, we are doing a scrub */ if (exists == NULL && hdr->b_l1hdr.b_state == arc_anon) - arc_access(hdr, hash_lock); + arc_access(hdr, 0, B_FALSE); mutex_exit(hash_lock); } else { arc_hdr_clear_flags(hdr, ARC_FLAG_IO_IN_PROGRESS); + VERIFY3S(remove_reference(hdr, hdr), >, 0); } - ASSERT(!zfs_refcount_is_zero(&hdr->b_l1hdr.b_refcnt)); callback->awcb_done(zio, buf, callback->awcb_private); abd_free(zio->io_abd); @@ -7100,7 +7063,7 @@ arc_write_done(zio_t *zio) zio_t * arc_write(zio_t *pio, spa_t *spa, uint64_t txg, - blkptr_t *bp, arc_buf_t *buf, boolean_t l2arc, + blkptr_t *bp, arc_buf_t *buf, boolean_t uncached, boolean_t l2arc, const zio_prop_t *zp, arc_write_done_func_t *ready, arc_write_done_func_t *children_ready, arc_write_done_func_t *physdone, arc_write_done_func_t *done, void *private, zio_priority_t priority, @@ -7117,7 +7080,9 @@ arc_write(zio_t *pio, spa_t *spa, uint64_t txg, ASSERT(!HDR_IO_IN_PROGRESS(hdr)); ASSERT3P(hdr->b_l1hdr.b_acb, ==, NULL); ASSERT3U(hdr->b_l1hdr.b_bufcnt, >, 0); - if (l2arc) + if (uncached) + arc_hdr_set_flags(hdr, ARC_FLAG_UNCACHED); + else if (l2arc) arc_hdr_set_flags(hdr, ARC_FLAG_L2CACHE); if (ARC_BUF_ENCRYPTED(buf)) { @@ -7303,22 +7268,32 @@ arc_kstat_update(kstat_t *ksp, int rw) as->arcstat_hits.value.ui64 = wmsum_value(&arc_sums.arcstat_hits); + as->arcstat_iohits.value.ui64 = + wmsum_value(&arc_sums.arcstat_iohits); as->arcstat_misses.value.ui64 = wmsum_value(&arc_sums.arcstat_misses); as->arcstat_demand_data_hits.value.ui64 = wmsum_value(&arc_sums.arcstat_demand_data_hits); + as->arcstat_demand_data_iohits.value.ui64 = + wmsum_value(&arc_sums.arcstat_demand_data_iohits); as->arcstat_demand_data_misses.value.ui64 = wmsum_value(&arc_sums.arcstat_demand_data_misses); as->arcstat_demand_metadata_hits.value.ui64 = wmsum_value(&arc_sums.arcstat_demand_metadata_hits); + as->arcstat_demand_metadata_iohits.value.ui64 = + wmsum_value(&arc_sums.arcstat_demand_metadata_iohits); as->arcstat_demand_metadata_misses.value.ui64 = wmsum_value(&arc_sums.arcstat_demand_metadata_misses); as->arcstat_prefetch_data_hits.value.ui64 = wmsum_value(&arc_sums.arcstat_prefetch_data_hits); + as->arcstat_prefetch_data_iohits.value.ui64 = + wmsum_value(&arc_sums.arcstat_prefetch_data_iohits); as->arcstat_prefetch_data_misses.value.ui64 = wmsum_value(&arc_sums.arcstat_prefetch_data_misses); as->arcstat_prefetch_metadata_hits.value.ui64 = wmsum_value(&arc_sums.arcstat_prefetch_metadata_hits); + as->arcstat_prefetch_metadata_iohits.value.ui64 = + wmsum_value(&arc_sums.arcstat_prefetch_metadata_iohits); as->arcstat_prefetch_metadata_misses.value.ui64 = wmsum_value(&arc_sums.arcstat_prefetch_metadata_misses); as->arcstat_mru_hits.value.ui64 = @@ -7329,6 +7304,8 @@ arc_kstat_update(kstat_t *ksp, int rw) wmsum_value(&arc_sums.arcstat_mfu_hits); as->arcstat_mfu_ghost_hits.value.ui64 = wmsum_value(&arc_sums.arcstat_mfu_ghost_hits); + as->arcstat_uncached_hits.value.ui64 = + wmsum_value(&arc_sums.arcstat_uncached_hits); as->arcstat_deleted.value.ui64 = wmsum_value(&arc_sums.arcstat_deleted); as->arcstat_mutex_miss.value.ui64 = @@ -7398,6 +7375,10 @@ arc_kstat_update(kstat_t *ksp, int rw) &as->arcstat_mfu_ghost_size, &as->arcstat_mfu_ghost_evictable_data, &as->arcstat_mfu_ghost_evictable_metadata); + arc_kstat_update_state(arc_uncached, + &as->arcstat_uncached_size, + &as->arcstat_uncached_evictable_data, + &as->arcstat_uncached_evictable_metadata); as->arcstat_dnode_size.value.ui64 = aggsum_value(&arc_sums.arcstat_dnode_size); @@ -7501,10 +7482,18 @@ arc_kstat_update(kstat_t *ksp, int rw) aggsum_value(&arc_sums.arcstat_meta_used); as->arcstat_async_upgrade_sync.value.ui64 = wmsum_value(&arc_sums.arcstat_async_upgrade_sync); + as->arcstat_predictive_prefetch.value.ui64 = + wmsum_value(&arc_sums.arcstat_predictive_prefetch); as->arcstat_demand_hit_predictive_prefetch.value.ui64 = wmsum_value(&arc_sums.arcstat_demand_hit_predictive_prefetch); + as->arcstat_demand_iohit_predictive_prefetch.value.ui64 = + wmsum_value(&arc_sums.arcstat_demand_iohit_predictive_prefetch); + as->arcstat_prescient_prefetch.value.ui64 = + wmsum_value(&arc_sums.arcstat_prescient_prefetch); as->arcstat_demand_hit_prescient_prefetch.value.ui64 = wmsum_value(&arc_sums.arcstat_demand_hit_prescient_prefetch); + as->arcstat_demand_iohit_prescient_prefetch.value.ui64 = + wmsum_value(&arc_sums.arcstat_demand_iohit_prescient_prefetch); as->arcstat_raw_size.value.ui64 = wmsum_value(&arc_sums.arcstat_raw_size); as->arcstat_cached_only_in_progress.value.ui64 = @@ -7698,6 +7687,10 @@ arc_state_init(void) arc_state_multilist_index_func, &num_sublists); arc_state_multilist_init(&arc_mfu_ghost->arcs_list[ARC_BUFC_DATA], arc_state_multilist_index_func, &num_sublists); + arc_state_multilist_init(&arc_uncached->arcs_list[ARC_BUFC_METADATA], + arc_state_multilist_index_func, &num_sublists); + arc_state_multilist_init(&arc_uncached->arcs_list[ARC_BUFC_DATA], + arc_state_multilist_index_func, &num_sublists); /* * L2 headers should never be on the L2 state list since they don't @@ -7727,6 +7720,8 @@ arc_state_init(void) zfs_refcount_create(&arc_mfu_ghost->arcs_esize[ARC_BUFC_DATA]); zfs_refcount_create(&arc_l2c_only->arcs_esize[ARC_BUFC_METADATA]); zfs_refcount_create(&arc_l2c_only->arcs_esize[ARC_BUFC_DATA]); + zfs_refcount_create(&arc_uncached->arcs_esize[ARC_BUFC_METADATA]); + zfs_refcount_create(&arc_uncached->arcs_esize[ARC_BUFC_DATA]); zfs_refcount_create(&arc_anon->arcs_size); zfs_refcount_create(&arc_mru->arcs_size); @@ -7734,21 +7729,28 @@ arc_state_init(void) zfs_refcount_create(&arc_mfu->arcs_size); zfs_refcount_create(&arc_mfu_ghost->arcs_size); zfs_refcount_create(&arc_l2c_only->arcs_size); + zfs_refcount_create(&arc_uncached->arcs_size); wmsum_init(&arc_sums.arcstat_hits, 0); + wmsum_init(&arc_sums.arcstat_iohits, 0); wmsum_init(&arc_sums.arcstat_misses, 0); wmsum_init(&arc_sums.arcstat_demand_data_hits, 0); + wmsum_init(&arc_sums.arcstat_demand_data_iohits, 0); wmsum_init(&arc_sums.arcstat_demand_data_misses, 0); wmsum_init(&arc_sums.arcstat_demand_metadata_hits, 0); + wmsum_init(&arc_sums.arcstat_demand_metadata_iohits, 0); wmsum_init(&arc_sums.arcstat_demand_metadata_misses, 0); wmsum_init(&arc_sums.arcstat_prefetch_data_hits, 0); + wmsum_init(&arc_sums.arcstat_prefetch_data_iohits, 0); wmsum_init(&arc_sums.arcstat_prefetch_data_misses, 0); wmsum_init(&arc_sums.arcstat_prefetch_metadata_hits, 0); + wmsum_init(&arc_sums.arcstat_prefetch_metadata_iohits, 0); wmsum_init(&arc_sums.arcstat_prefetch_metadata_misses, 0); wmsum_init(&arc_sums.arcstat_mru_hits, 0); wmsum_init(&arc_sums.arcstat_mru_ghost_hits, 0); wmsum_init(&arc_sums.arcstat_mfu_hits, 0); wmsum_init(&arc_sums.arcstat_mfu_ghost_hits, 0); + wmsum_init(&arc_sums.arcstat_uncached_hits, 0); wmsum_init(&arc_sums.arcstat_deleted, 0); wmsum_init(&arc_sums.arcstat_mutex_miss, 0); wmsum_init(&arc_sums.arcstat_access_skip, 0); @@ -7817,8 +7819,12 @@ arc_state_init(void) wmsum_init(&arc_sums.arcstat_prune, 0); aggsum_init(&arc_sums.arcstat_meta_used, 0); wmsum_init(&arc_sums.arcstat_async_upgrade_sync, 0); + wmsum_init(&arc_sums.arcstat_predictive_prefetch, 0); wmsum_init(&arc_sums.arcstat_demand_hit_predictive_prefetch, 0); + wmsum_init(&arc_sums.arcstat_demand_iohit_predictive_prefetch, 0); + wmsum_init(&arc_sums.arcstat_prescient_prefetch, 0); wmsum_init(&arc_sums.arcstat_demand_hit_prescient_prefetch, 0); + wmsum_init(&arc_sums.arcstat_demand_iohit_prescient_prefetch, 0); wmsum_init(&arc_sums.arcstat_raw_size, 0); wmsum_init(&arc_sums.arcstat_cached_only_in_progress, 0); wmsum_init(&arc_sums.arcstat_abd_chunk_waste_size, 0); @@ -7829,6 +7835,7 @@ arc_state_init(void) arc_mfu->arcs_state = ARC_STATE_MFU; arc_mfu_ghost->arcs_state = ARC_STATE_MFU_GHOST; arc_l2c_only->arcs_state = ARC_STATE_L2C_ONLY; + arc_uncached->arcs_state = ARC_STATE_UNCACHED; } static void @@ -7846,6 +7853,8 @@ arc_state_fini(void) zfs_refcount_destroy(&arc_mfu_ghost->arcs_esize[ARC_BUFC_DATA]); zfs_refcount_destroy(&arc_l2c_only->arcs_esize[ARC_BUFC_METADATA]); zfs_refcount_destroy(&arc_l2c_only->arcs_esize[ARC_BUFC_DATA]); + zfs_refcount_destroy(&arc_uncached->arcs_esize[ARC_BUFC_METADATA]); + zfs_refcount_destroy(&arc_uncached->arcs_esize[ARC_BUFC_DATA]); zfs_refcount_destroy(&arc_anon->arcs_size); zfs_refcount_destroy(&arc_mru->arcs_size); @@ -7853,6 +7862,7 @@ arc_state_fini(void) zfs_refcount_destroy(&arc_mfu->arcs_size); zfs_refcount_destroy(&arc_mfu_ghost->arcs_size); zfs_refcount_destroy(&arc_l2c_only->arcs_size); + zfs_refcount_destroy(&arc_uncached->arcs_size); multilist_destroy(&arc_mru->arcs_list[ARC_BUFC_METADATA]); multilist_destroy(&arc_mru_ghost->arcs_list[ARC_BUFC_METADATA]); @@ -7864,21 +7874,29 @@ arc_state_fini(void) multilist_destroy(&arc_mfu_ghost->arcs_list[ARC_BUFC_DATA]); multilist_destroy(&arc_l2c_only->arcs_list[ARC_BUFC_METADATA]); multilist_destroy(&arc_l2c_only->arcs_list[ARC_BUFC_DATA]); + multilist_destroy(&arc_uncached->arcs_list[ARC_BUFC_METADATA]); + multilist_destroy(&arc_uncached->arcs_list[ARC_BUFC_DATA]); wmsum_fini(&arc_sums.arcstat_hits); + wmsum_fini(&arc_sums.arcstat_iohits); wmsum_fini(&arc_sums.arcstat_misses); wmsum_fini(&arc_sums.arcstat_demand_data_hits); + wmsum_fini(&arc_sums.arcstat_demand_data_iohits); wmsum_fini(&arc_sums.arcstat_demand_data_misses); wmsum_fini(&arc_sums.arcstat_demand_metadata_hits); + wmsum_fini(&arc_sums.arcstat_demand_metadata_iohits); wmsum_fini(&arc_sums.arcstat_demand_metadata_misses); wmsum_fini(&arc_sums.arcstat_prefetch_data_hits); + wmsum_fini(&arc_sums.arcstat_prefetch_data_iohits); wmsum_fini(&arc_sums.arcstat_prefetch_data_misses); wmsum_fini(&arc_sums.arcstat_prefetch_metadata_hits); + wmsum_fini(&arc_sums.arcstat_prefetch_metadata_iohits); wmsum_fini(&arc_sums.arcstat_prefetch_metadata_misses); wmsum_fini(&arc_sums.arcstat_mru_hits); wmsum_fini(&arc_sums.arcstat_mru_ghost_hits); wmsum_fini(&arc_sums.arcstat_mfu_hits); wmsum_fini(&arc_sums.arcstat_mfu_ghost_hits); + wmsum_fini(&arc_sums.arcstat_uncached_hits); wmsum_fini(&arc_sums.arcstat_deleted); wmsum_fini(&arc_sums.arcstat_mutex_miss); wmsum_fini(&arc_sums.arcstat_access_skip); @@ -7947,8 +7965,12 @@ arc_state_fini(void) wmsum_fini(&arc_sums.arcstat_prune); aggsum_fini(&arc_sums.arcstat_meta_used); wmsum_fini(&arc_sums.arcstat_async_upgrade_sync); + wmsum_fini(&arc_sums.arcstat_predictive_prefetch); wmsum_fini(&arc_sums.arcstat_demand_hit_predictive_prefetch); + wmsum_fini(&arc_sums.arcstat_demand_iohit_predictive_prefetch); + wmsum_fini(&arc_sums.arcstat_prescient_prefetch); wmsum_fini(&arc_sums.arcstat_demand_hit_prescient_prefetch); + wmsum_fini(&arc_sums.arcstat_demand_iohit_prescient_prefetch); wmsum_fini(&arc_sums.arcstat_raw_size); wmsum_fini(&arc_sums.arcstat_cached_only_in_progress); wmsum_fini(&arc_sums.arcstat_abd_chunk_waste_size); @@ -8059,8 +8081,8 @@ arc_init(void) arc_state_evict_markers = arc_state_alloc_markers(arc_state_evict_marker_count); - arc_evict_zthr = zthr_create("arc_evict", - arc_evict_cb_check, arc_evict_cb, NULL, defclsyspri); + arc_evict_zthr = zthr_create_timer("arc_evict", + arc_evict_cb_check, arc_evict_cb, NULL, SEC2NSEC(1), defclsyspri); arc_reap_zthr = zthr_create_timer("arc_reap", arc_reap_cb_check, arc_reap_cb, NULL, SEC2NSEC(1), minclsyspri); @@ -9259,7 +9281,7 @@ l2arc_evict(l2arc_dev_t *dev, uint64_t distance, boolean_t all) * arc_hdr_destroy() will call list_remove() * and decrement arcstat_l2_lsize. */ - arc_change_state(arc_anon, hdr, hash_lock); + arc_change_state(arc_anon, hdr); arc_hdr_destroy(hdr); } else { ASSERT(hdr->b_l1hdr.b_state != arc_l2c_only); diff --git a/sys/contrib/openzfs/module/zfs/blake3_zfs.c b/sys/contrib/openzfs/module/zfs/blake3_zfs.c index 7560f30fd4e4..bcc595bca8f2 100644 --- a/sys/contrib/openzfs/module/zfs/blake3_zfs.c +++ b/sys/contrib/openzfs/module/zfs/blake3_zfs.c @@ -47,7 +47,7 @@ void abd_checksum_blake3_native(abd_t *abd, uint64_t size, const void *ctx_template, zio_cksum_t *zcp) { - ASSERT(ctx_template != 0); + ASSERT(ctx_template != NULL); #if defined(_KERNEL) BLAKE3_CTX *ctx = blake3_per_cpu_ctx[CPU_SEQID_UNSTABLE]; @@ -76,7 +76,7 @@ abd_checksum_blake3_byteswap(abd_t *abd, uint64_t size, { zio_cksum_t tmp; - ASSERT(ctx_template != 0); + ASSERT(ctx_template != NULL); abd_checksum_blake3_native(abd, size, ctx_template, &tmp); zcp->zc_word[0] = BSWAP_64(tmp.zc_word[0]); diff --git a/sys/contrib/openzfs/module/zfs/bqueue.c b/sys/contrib/openzfs/module/zfs/bqueue.c index ec5ce4388ec8..a7fa516975de 100644 --- a/sys/contrib/openzfs/module/zfs/bqueue.c +++ b/sys/contrib/openzfs/module/zfs/bqueue.c @@ -27,19 +27,26 @@ obj2node(bqueue_t *q, void *data) /* * Initialize a blocking queue The maximum capacity of the queue is set to - * size. Types that are stored in a bqueue must contain a bqueue_node_t, - * and node_offset must be its offset from the start of the struct. - * fill_fraction is a performance tuning value; when the queue is full, any - * threads attempting to enqueue records will block. They will block until - * they're signaled, which will occur when the queue is at least 1/fill_fraction + * size. Types that are stored in a bqueue must contain a bqueue_node_t, and + * node_offset must be its offset from the start of the struct. fill_fraction + * is a performance tuning value; when the queue is full, any threads + * attempting to enqueue records will block. They will block until they're + * signaled, which will occur when the queue is at least 1/fill_fraction * empty. Similar behavior occurs on dequeue; if the queue is empty, threads - * block. They will be signalled when the queue has 1/fill_fraction full, or - * when bqueue_flush is called. As a result, you must call bqueue_flush when - * you enqueue your final record on a thread, in case the dequeueing threads are - * currently blocked and that enqueue does not cause them to be awoken. - * Alternatively, this behavior can be disabled (causing signaling to happen - * immediately) by setting fill_fraction to any value larger than size. - * Return 0 on success, or -1 on failure. + * block. They will be signalled when the queue has 1/fill_fraction full. + * As a result, you must call bqueue_enqueue_flush() when you enqueue your + * final record on a thread, in case the dequeuing threads are currently + * blocked and that enqueue does not cause them to be woken. Alternatively, + * this behavior can be disabled (causing signaling to happen immediately) by + * setting fill_fraction to any value larger than size. Return 0 on success, + * or -1 on failure. + * + * Note: The caller must ensure that for a given bqueue_t, there's only a + * single call to bqueue_enqueue() running at a time (e.g. by calling only + * from a single thread, or with locking around the call). Similarly, the + * caller must ensure that there's only a single call to bqueue_dequeue() + * running at a time. However, the one call to bqueue_enqueue() may be + * invoked concurrently with the one call to bqueue_dequeue(). */ int bqueue_init(bqueue_t *q, uint_t fill_fraction, size_t size, size_t node_offset) @@ -49,11 +56,17 @@ bqueue_init(bqueue_t *q, uint_t fill_fraction, size_t size, size_t node_offset) } list_create(&q->bq_list, node_offset + sizeof (bqueue_node_t), node_offset + offsetof(bqueue_node_t, bqn_node)); + list_create(&q->bq_dequeuing_list, node_offset + sizeof (bqueue_node_t), + node_offset + offsetof(bqueue_node_t, bqn_node)); + list_create(&q->bq_enqueuing_list, node_offset + sizeof (bqueue_node_t), + node_offset + offsetof(bqueue_node_t, bqn_node)); cv_init(&q->bq_add_cv, NULL, CV_DEFAULT, NULL); cv_init(&q->bq_pop_cv, NULL, CV_DEFAULT, NULL); mutex_init(&q->bq_lock, NULL, MUTEX_DEFAULT, NULL); q->bq_node_offset = node_offset; q->bq_size = 0; + q->bq_dequeuing_size = 0; + q->bq_enqueuing_size = 0; q->bq_maxsize = size; q->bq_fill_fraction = fill_fraction; return (0); @@ -69,9 +82,13 @@ bqueue_destroy(bqueue_t *q) { mutex_enter(&q->bq_lock); ASSERT0(q->bq_size); + ASSERT0(q->bq_dequeuing_size); + ASSERT0(q->bq_enqueuing_size); cv_destroy(&q->bq_add_cv); cv_destroy(&q->bq_pop_cv); list_destroy(&q->bq_list); + list_destroy(&q->bq_dequeuing_list); + list_destroy(&q->bq_enqueuing_list); mutex_exit(&q->bq_lock); mutex_destroy(&q->bq_lock); } @@ -81,23 +98,24 @@ bqueue_enqueue_impl(bqueue_t *q, void *data, size_t item_size, boolean_t flush) { ASSERT3U(item_size, >, 0); ASSERT3U(item_size, <=, q->bq_maxsize); - mutex_enter(&q->bq_lock); + obj2node(q, data)->bqn_size = item_size; - while (q->bq_size && q->bq_size + item_size > q->bq_maxsize) { - /* - * Wake up bqueue_dequeue() thread if already sleeping in order - * to prevent the deadlock condition - */ - cv_signal(&q->bq_pop_cv); - cv_wait_sig(&q->bq_add_cv, &q->bq_lock); - } - q->bq_size += item_size; - list_insert_tail(&q->bq_list, data); - if (flush) + q->bq_enqueuing_size += item_size; + list_insert_tail(&q->bq_enqueuing_list, data); + + if (flush || + q->bq_enqueuing_size >= q->bq_maxsize / q->bq_fill_fraction) { + /* Append the enquing list to the shared list. */ + mutex_enter(&q->bq_lock); + while (q->bq_size > q->bq_maxsize) { + cv_wait_sig(&q->bq_add_cv, &q->bq_lock); + } + q->bq_size += q->bq_enqueuing_size; + list_move_tail(&q->bq_list, &q->bq_enqueuing_list); + q->bq_enqueuing_size = 0; cv_broadcast(&q->bq_pop_cv); - else if (q->bq_size >= q->bq_maxsize / q->bq_fill_fraction) - cv_signal(&q->bq_pop_cv); - mutex_exit(&q->bq_lock); + mutex_exit(&q->bq_lock); + } } /* @@ -115,8 +133,8 @@ bqueue_enqueue(bqueue_t *q, void *data, size_t item_size) * Enqueue an entry, and then flush the queue. This forces the popping threads * to wake up, even if we're below the fill fraction. We have this in a single * function, rather than having a separate call, because it prevents race - * conditions between the enqueuing thread and the dequeueing thread, where the - * enqueueing thread will wake up the dequeueing thread, that thread will + * conditions between the enqueuing thread and the dequeuing thread, where the + * enqueueing thread will wake up the dequeuing thread, that thread will * destroy the condvar before the enqueuing thread is done. */ void @@ -132,27 +150,26 @@ bqueue_enqueue_flush(bqueue_t *q, void *data, size_t item_size) void * bqueue_dequeue(bqueue_t *q) { - void *ret = NULL; - size_t item_size; - mutex_enter(&q->bq_lock); - while (q->bq_size == 0) { - cv_wait_sig(&q->bq_pop_cv, &q->bq_lock); + void *ret = list_remove_head(&q->bq_dequeuing_list); + if (ret == NULL) { + /* + * Dequeuing list is empty. Wait for there to be something on + * the shared list, then move the entire shared list to the + * dequeuing list. + */ + mutex_enter(&q->bq_lock); + while (q->bq_size == 0) { + cv_wait_sig(&q->bq_pop_cv, &q->bq_lock); + } + ASSERT0(q->bq_dequeuing_size); + ASSERT(list_is_empty(&q->bq_dequeuing_list)); + list_move_tail(&q->bq_dequeuing_list, &q->bq_list); + q->bq_dequeuing_size = q->bq_size; + q->bq_size = 0; + cv_broadcast(&q->bq_add_cv); + mutex_exit(&q->bq_lock); + ret = list_remove_head(&q->bq_dequeuing_list); } - ret = list_remove_head(&q->bq_list); - ASSERT3P(ret, !=, NULL); - item_size = obj2node(q, ret)->bqn_size; - q->bq_size -= item_size; - if (q->bq_size <= q->bq_maxsize - (q->bq_maxsize / q->bq_fill_fraction)) - cv_signal(&q->bq_add_cv); - mutex_exit(&q->bq_lock); + q->bq_dequeuing_size -= obj2node(q, ret)->bqn_size; return (ret); } - -/* - * Returns true if the space used is 0. - */ -boolean_t -bqueue_empty(bqueue_t *q) -{ - return (q->bq_size == 0); -} diff --git a/sys/contrib/openzfs/module/zfs/dbuf.c b/sys/contrib/openzfs/module/zfs/dbuf.c index 52760fb1b57e..efaa13317be0 100644 --- a/sys/contrib/openzfs/module/zfs/dbuf.c +++ b/sys/contrib/openzfs/module/zfs/dbuf.c @@ -339,7 +339,8 @@ dbuf_hash(void *os, uint64_t obj, uint8_t lvl, uint64_t blkid) (dbuf)->db_blkid == (blkid)) dmu_buf_impl_t * -dbuf_find(objset_t *os, uint64_t obj, uint8_t level, uint64_t blkid) +dbuf_find(objset_t *os, uint64_t obj, uint8_t level, uint64_t blkid, + uint64_t *hash_out) { dbuf_hash_table_t *h = &dbuf_hash_table; uint64_t hv; @@ -361,6 +362,8 @@ dbuf_find(objset_t *os, uint64_t obj, uint8_t level, uint64_t blkid) } } mutex_exit(DBUF_HASH_MUTEX(h, idx)); + if (hash_out != NULL) + *hash_out = hv; return (NULL); } @@ -395,13 +398,13 @@ dbuf_hash_insert(dmu_buf_impl_t *db) objset_t *os = db->db_objset; uint64_t obj = db->db.db_object; int level = db->db_level; - uint64_t blkid, hv, idx; + uint64_t blkid, idx; dmu_buf_impl_t *dbf; uint32_t i; blkid = db->db_blkid; - hv = dbuf_hash(os, obj, level, blkid); - idx = hv & h->hash_table_mask; + ASSERT3U(dbuf_hash(os, obj, level, blkid), ==, db->db_hash); + idx = db->db_hash & h->hash_table_mask; mutex_enter(DBUF_HASH_MUTEX(h, idx)); for (dbf = h->hash_table[idx], i = 0; dbf != NULL; @@ -475,12 +478,12 @@ static void dbuf_hash_remove(dmu_buf_impl_t *db) { dbuf_hash_table_t *h = &dbuf_hash_table; - uint64_t hv, idx; + uint64_t idx; dmu_buf_impl_t *dbf, **dbp; - hv = dbuf_hash(db->db_objset, db->db.db_object, - db->db_level, db->db_blkid); - idx = hv & h->hash_table_mask; + ASSERT3U(dbuf_hash(db->db_objset, db->db.db_object, db->db_level, + db->db_blkid), ==, db->db_hash); + idx = db->db_hash & h->hash_table_mask; /* * We mustn't hold db_mtx to maintain lock ordering: @@ -1605,7 +1608,9 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags, DTRACE_SET_STATE(db, "read issued"); mutex_exit(&db->db_mtx); - if (dbuf_is_l2cacheable(db)) + if (!DBUF_IS_CACHEABLE(db)) + aflags |= ARC_FLAG_UNCACHED; + else if (dbuf_is_l2cacheable(db)) aflags |= ARC_FLAG_L2CACHE; dbuf_add_ref(db, NULL); @@ -1733,10 +1738,13 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags) dn = DB_DNODE(db); prefetch = db->db_level == 0 && db->db_blkid != DMU_BONUS_BLKID && - (flags & DB_RF_NOPREFETCH) == 0 && dn != NULL && - DBUF_IS_CACHEABLE(db); + (flags & DB_RF_NOPREFETCH) == 0 && dn != NULL; mutex_enter(&db->db_mtx); + if (flags & DB_RF_PARTIAL_FIRST) + db->db_partial_read = B_TRUE; + else if (!(flags & DB_RF_PARTIAL_MORE)) + db->db_partial_read = B_FALSE; if (db->db_state == DB_CACHED) { /* * Ensure that this block's dnode has been decrypted if @@ -2124,7 +2132,8 @@ dbuf_dirty_lightweight(dnode_t *dn, uint64_t blkid, dmu_tx_t *tx) * Otherwise the buffer contents could be inconsistent between the * dbuf and the lightweight dirty record. */ - ASSERT3P(NULL, ==, dbuf_find(dn->dn_objset, dn->dn_object, 0, blkid)); + ASSERT3P(NULL, ==, dbuf_find(dn->dn_objset, dn->dn_object, 0, blkid, + NULL)); mutex_enter(&dn->dn_mtx); int txgoff = tx->tx_txg & TXG_MASK; @@ -3073,7 +3082,7 @@ dbuf_findbp(dnode_t *dn, int level, uint64_t blkid, int fail_sparse, static dmu_buf_impl_t * dbuf_create(dnode_t *dn, uint8_t level, uint64_t blkid, - dmu_buf_impl_t *parent, blkptr_t *blkptr) + dmu_buf_impl_t *parent, blkptr_t *blkptr, uint64_t hash) { objset_t *os = dn->dn_objset; dmu_buf_impl_t *db, *odb; @@ -3094,6 +3103,7 @@ dbuf_create(dnode_t *dn, uint8_t level, uint64_t blkid, db->db_dnode_handle = dn->dn_handle; db->db_parent = parent; db->db_blkptr = blkptr; + db->db_hash = hash; db->db_user = NULL; db->db_user_immediate_evict = FALSE; @@ -3394,7 +3404,7 @@ dbuf_prefetch_impl(dnode_t *dn, int64_t level, uint64_t blkid, goto no_issue; dmu_buf_impl_t *db = dbuf_find(dn->dn_objset, dn->dn_object, - level, blkid); + level, blkid, NULL); if (db != NULL) { mutex_exit(&db->db_mtx); /* @@ -3458,8 +3468,9 @@ dbuf_prefetch_impl(dnode_t *dn, int64_t level, uint64_t blkid, dpa->dpa_cb = cb; dpa->dpa_arg = arg; - /* flag if L2ARC eligible, l2arc_noprefetch then decides */ - if (dnode_level_is_l2cacheable(&bp, dn, level)) + if (!DNODE_LEVEL_IS_CACHEABLE(dn, level)) + dpa->dpa_aflags |= ARC_FLAG_UNCACHED; + else if (dnode_level_is_l2cacheable(&bp, dn, level)) dpa->dpa_aflags |= ARC_FLAG_L2CACHE; /* @@ -3559,6 +3570,7 @@ dbuf_hold_impl(dnode_t *dn, uint8_t level, uint64_t blkid, const void *tag, dmu_buf_impl_t **dbp) { dmu_buf_impl_t *db, *parent = NULL; + uint64_t hv; /* If the pool has been created, verify the tx_sync_lock is not held */ spa_t *spa = dn->dn_objset->os_spa; @@ -3574,7 +3586,7 @@ dbuf_hold_impl(dnode_t *dn, uint8_t level, uint64_t blkid, *dbp = NULL; /* dbuf_find() returns with db_mtx held */ - db = dbuf_find(dn->dn_objset, dn->dn_object, level, blkid); + db = dbuf_find(dn->dn_objset, dn->dn_object, level, blkid, &hv); if (db == NULL) { blkptr_t *bp = NULL; @@ -3596,7 +3608,7 @@ dbuf_hold_impl(dnode_t *dn, uint8_t level, uint64_t blkid, } if (err && err != ENOENT) return (err); - db = dbuf_create(dn, level, blkid, parent, bp); + db = dbuf_create(dn, level, blkid, parent, bp, hv); } if (fail_uncached && db->db_state != DB_CACHED) { @@ -3680,7 +3692,8 @@ dbuf_create_bonus(dnode_t *dn) ASSERT(RW_WRITE_HELD(&dn->dn_struct_rwlock)); ASSERT(dn->dn_bonus == NULL); - dn->dn_bonus = dbuf_create(dn, 0, DMU_BONUS_BLKID, dn->dn_dbuf, NULL); + dn->dn_bonus = dbuf_create(dn, 0, DMU_BONUS_BLKID, dn->dn_dbuf, NULL, + dbuf_hash(dn->dn_objset, dn->dn_object, 0, DMU_BONUS_BLKID)); } int @@ -3726,7 +3739,7 @@ dbuf_try_add_ref(dmu_buf_t *db_fake, objset_t *os, uint64_t obj, uint64_t blkid, if (blkid == DMU_BONUS_BLKID) found_db = dbuf_find_bonus(os, obj); else - found_db = dbuf_find(os, obj, 0, blkid); + found_db = dbuf_find(os, obj, 0, blkid, NULL); if (found_db != NULL) { if (db == found_db && dbuf_refcount(db) > db->db_dirtycnt) { @@ -3846,59 +3859,38 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, const void *tag, boolean_t evicting) * This dbuf has anonymous data associated with it. */ dbuf_destroy(db); - } else { - boolean_t do_arc_evict = B_FALSE; - blkptr_t bp; - spa_t *spa = dmu_objset_spa(db->db_objset); + } else if (!(DBUF_IS_CACHEABLE(db) || db->db_partial_read) || + db->db_pending_evict) { + dbuf_destroy(db); + } else if (!multilist_link_active(&db->db_cache_link)) { + ASSERT3U(db->db_caching_status, ==, DB_NO_CACHE); - if (!DBUF_IS_CACHEABLE(db) && - db->db_blkptr != NULL && - !BP_IS_HOLE(db->db_blkptr) && - !BP_IS_EMBEDDED(db->db_blkptr)) { - do_arc_evict = B_TRUE; - bp = *db->db_blkptr; + dbuf_cached_state_t dcs = + dbuf_include_in_metadata_cache(db) ? + DB_DBUF_METADATA_CACHE : DB_DBUF_CACHE; + db->db_caching_status = dcs; + + multilist_insert(&dbuf_caches[dcs].cache, db); + uint64_t db_size = db->db.db_size; + size = zfs_refcount_add_many( + &dbuf_caches[dcs].size, db_size, db); + uint8_t db_level = db->db_level; + mutex_exit(&db->db_mtx); + + if (dcs == DB_DBUF_METADATA_CACHE) { + DBUF_STAT_BUMP(metadata_cache_count); + DBUF_STAT_MAX(metadata_cache_size_bytes_max, + size); + } else { + DBUF_STAT_BUMP(cache_count); + DBUF_STAT_MAX(cache_size_bytes_max, size); + DBUF_STAT_BUMP(cache_levels[db_level]); + DBUF_STAT_INCR(cache_levels_bytes[db_level], + db_size); } - if (!DBUF_IS_CACHEABLE(db) || - db->db_pending_evict) { - dbuf_destroy(db); - } else if (!multilist_link_active(&db->db_cache_link)) { - ASSERT3U(db->db_caching_status, ==, - DB_NO_CACHE); - - dbuf_cached_state_t dcs = - dbuf_include_in_metadata_cache(db) ? - DB_DBUF_METADATA_CACHE : DB_DBUF_CACHE; - db->db_caching_status = dcs; - - multilist_insert(&dbuf_caches[dcs].cache, db); - uint64_t db_size = db->db.db_size; - size = zfs_refcount_add_many( - &dbuf_caches[dcs].size, db_size, db); - uint8_t db_level = db->db_level; - mutex_exit(&db->db_mtx); - - if (dcs == DB_DBUF_METADATA_CACHE) { - DBUF_STAT_BUMP(metadata_cache_count); - DBUF_STAT_MAX( - metadata_cache_size_bytes_max, - size); - } else { - DBUF_STAT_BUMP(cache_count); - DBUF_STAT_MAX(cache_size_bytes_max, - size); - DBUF_STAT_BUMP(cache_levels[db_level]); - DBUF_STAT_INCR( - cache_levels_bytes[db_level], - db_size); - } - - if (dcs == DB_DBUF_CACHE && !evicting) - dbuf_evict_notify(size); - } - - if (do_arc_evict) - arc_freed(spa, &bp); + if (dcs == DB_DBUF_CACHE && !evicting) + dbuf_evict_notify(size); } } else { mutex_exit(&db->db_mtx); @@ -5076,8 +5068,8 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx) children_ready_cb = dbuf_write_children_ready; dr->dr_zio = arc_write(pio, os->os_spa, txg, - &dr->dr_bp_copy, data, dbuf_is_l2cacheable(db), - &zp, dbuf_write_ready, + &dr->dr_bp_copy, data, !DBUF_IS_CACHEABLE(db), + dbuf_is_l2cacheable(db), &zp, dbuf_write_ready, children_ready_cb, dbuf_write_physdone, dbuf_write_done, db, ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb); diff --git a/sys/contrib/openzfs/module/zfs/dmu.c b/sys/contrib/openzfs/module/zfs/dmu.c index 45304e7ddf7a..d6a9f813c270 100644 --- a/sys/contrib/openzfs/module/zfs/dmu.c +++ b/sys/contrib/openzfs/module/zfs/dmu.c @@ -549,14 +549,14 @@ dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length, ZIO_FLAG_CANFAIL); blkid = dbuf_whichblock(dn, 0, offset); if ((flags & DMU_READ_NO_PREFETCH) == 0 && - DNODE_META_IS_CACHEABLE(dn) && length <= zfetch_array_rd_sz) { + length <= zfetch_array_rd_sz) { /* * Prepare the zfetch before initiating the demand reads, so * that if multiple threads block on same indirect block, we * base predictions on the original less racy request order. */ - zs = dmu_zfetch_prepare(&dn->dn_zfetch, blkid, nblks, - read && DNODE_IS_CACHEABLE(dn), B_TRUE); + zs = dmu_zfetch_prepare(&dn->dn_zfetch, blkid, nblks, read, + B_TRUE); } for (i = 0; i < nblks; i++) { dmu_buf_impl_t *db = dbuf_hold(dn, blkid + i, tag); @@ -579,6 +579,14 @@ dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length, * state will not yet be CACHED. */ if (read) { + if (i == nblks - 1 && blkid + i < dn->dn_maxblkid && + offset + length < db->db.db_offset + + db->db.db_size) { + if (offset <= db->db.db_offset) + dbuf_flags |= DB_RF_PARTIAL_FIRST; + else + dbuf_flags |= DB_RF_PARTIAL_MORE; + } (void) dbuf_read(db, zio, dbuf_flags); if (db->db_state != DB_CACHED) missed = B_TRUE; @@ -1850,8 +1858,8 @@ dmu_sync(zio_t *pio, uint64_t txg, dmu_sync_cb_t *done, zgd_t *zgd) dsa->dsa_zgd = zgd; dsa->dsa_tx = NULL; - zio_nowait(arc_write(pio, os->os_spa, txg, - zgd->zgd_bp, dr->dt.dl.dr_data, dbuf_is_l2cacheable(db), + zio_nowait(arc_write(pio, os->os_spa, txg, zgd->zgd_bp, + dr->dt.dl.dr_data, !DBUF_IS_CACHEABLE(db), dbuf_is_l2cacheable(db), &zp, dmu_sync_ready, NULL, NULL, dmu_sync_done, dsa, ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_CANFAIL, &zb)); diff --git a/sys/contrib/openzfs/module/zfs/dmu_objset.c b/sys/contrib/openzfs/module/zfs/dmu_objset.c index c17c829a04d8..2f18075ae658 100644 --- a/sys/contrib/openzfs/module/zfs/dmu_objset.c +++ b/sys/contrib/openzfs/module/zfs/dmu_objset.c @@ -1694,7 +1694,7 @@ dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx) } zio = arc_write(pio, os->os_spa, tx->tx_txg, - blkptr_copy, os->os_phys_buf, dmu_os_is_l2cacheable(os), + blkptr_copy, os->os_phys_buf, B_FALSE, dmu_os_is_l2cacheable(os), &zp, dmu_objset_write_ready, NULL, NULL, dmu_objset_write_done, os, ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb); @@ -2408,13 +2408,6 @@ dmu_objset_id_quota_upgrade_cb(objset_t *os) dmu_objset_userobjspace_present(os)) return (SET_ERROR(ENOTSUP)); - if (dmu_objset_userobjused_enabled(os)) - dmu_objset_ds(os)->ds_feature_activation[ - SPA_FEATURE_USEROBJ_ACCOUNTING] = (void *)B_TRUE; - if (dmu_objset_projectquota_enabled(os)) - dmu_objset_ds(os)->ds_feature_activation[ - SPA_FEATURE_PROJECT_QUOTA] = (void *)B_TRUE; - err = dmu_objset_space_upgrade(os); if (err) return (err); diff --git a/sys/contrib/openzfs/module/zfs/dmu_recv.c b/sys/contrib/openzfs/module/zfs/dmu_recv.c index 339fb149a49f..ddaa4a5c7291 100644 --- a/sys/contrib/openzfs/module/zfs/dmu_recv.c +++ b/sys/contrib/openzfs/module/zfs/dmu_recv.c @@ -31,6 +31,7 @@ * Copyright (c) 2022 Axcient. */ +#include #include #include #include @@ -75,6 +76,12 @@ static int zfs_recv_best_effort_corrective = 0; static const void *const dmu_recv_tag = "dmu_recv_tag"; const char *const recv_clone_name = "%recv"; +typedef enum { + ORNS_NO, + ORNS_YES, + ORNS_MAYBE +} or_need_sync_t; + static int receive_read_payload_and_next_header(dmu_recv_cookie_t *ra, int len, void *buf); @@ -128,6 +135,9 @@ struct receive_writer_arg { uint8_t or_mac[ZIO_DATA_MAC_LEN]; boolean_t or_byteorder; zio_t *heal_pio; + + /* Keep track of DRR_FREEOBJECTS right after DRR_OBJECT_RANGE */ + or_need_sync_t or_need_sync; }; typedef struct dmu_recv_begin_arg { @@ -1246,19 +1256,29 @@ dmu_recv_begin(char *tofs, char *tosnap, dmu_replay_record_t *drr_begin, uint32_t payloadlen = drc->drc_drr_begin->drr_payloadlen; void *payload = NULL; + + /* + * Since OpenZFS 2.0.0, we have enforced a 64MB limit in userspace + * configurable via ZFS_SENDRECV_MAX_NVLIST. We enforce 256MB as a hard + * upper limit. Systems with less than 1GB of RAM will see a lower + * limit from `arc_all_memory() / 4`. + */ + if (payloadlen > (MIN((1U << 28), arc_all_memory() / 4))) + return (E2BIG); + if (payloadlen != 0) - payload = kmem_alloc(payloadlen, KM_SLEEP); + payload = vmem_alloc(payloadlen, KM_SLEEP); err = receive_read_payload_and_next_header(drc, payloadlen, payload); if (err != 0) { - kmem_free(payload, payloadlen); + vmem_free(payload, payloadlen); return (err); } if (payloadlen != 0) { err = nvlist_unpack(payload, payloadlen, &drc->drc_begin_nvl, KM_SLEEP); - kmem_free(payload, payloadlen); + vmem_free(payload, payloadlen); if (err != 0) { kmem_free(drc->drc_next_rrd, sizeof (*drc->drc_next_rrd)); @@ -1500,11 +1520,11 @@ receive_read(dmu_recv_cookie_t *drc, int len, void *buf) (drc->drc_featureflags & DMU_BACKUP_FEATURE_RAW) != 0); while (done < len) { - ssize_t resid; + ssize_t resid = len - done; zfs_file_t *fp = drc->drc_fp; int err = zfs_file_read(fp, (char *)buf + done, len - done, &resid); - if (resid == len - done) { + if (err == 0 && resid == len - done) { /* * Note: ECKSUM or ZFS_ERR_STREAM_TRUNCATED indicates * that the receive was interrupted and can @@ -1903,10 +1923,22 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro, /* object was freed and we are about to allocate a new one */ object_to_hold = DMU_NEW_OBJECT; } else { + /* + * If the only record in this range so far was DRR_FREEOBJECTS + * with at least one actually freed object, it's possible that + * the block will now be converted to a hole. We need to wait + * for the txg to sync to prevent races. + */ + if (rwa->or_need_sync == ORNS_YES) + txg_wait_synced(dmu_objset_pool(rwa->os), 0); + /* object is free and we are about to allocate a new one */ object_to_hold = DMU_NEW_OBJECT; } + /* Only relevant for the first object in the range */ + rwa->or_need_sync = ORNS_NO; + /* * If this is a multi-slot dnode there is a chance that this * object will expand into a slot that is already used by @@ -2100,6 +2132,9 @@ receive_freeobjects(struct receive_writer_arg *rwa, if (err != 0) return (err); + + if (rwa->or_need_sync == ORNS_MAYBE) + rwa->or_need_sync = ORNS_YES; } if (next_err != ESRCH) return (next_err); @@ -2593,6 +2628,8 @@ receive_object_range(struct receive_writer_arg *rwa, memcpy(rwa->or_mac, drror->drr_mac, ZIO_DATA_MAC_LEN); rwa->or_byteorder = byteorder; + rwa->or_need_sync = ORNS_MAYBE; + return (0); } diff --git a/sys/contrib/openzfs/module/zfs/dmu_send.c b/sys/contrib/openzfs/module/zfs/dmu_send.c index fbf19d5c3372..7f8de23f0e29 100644 --- a/sys/contrib/openzfs/module/zfs/dmu_send.c +++ b/sys/contrib/openzfs/module/zfs/dmu_send.c @@ -584,7 +584,13 @@ dump_write_embedded(dmu_send_cookie_t *dscp, uint64_t object, uint64_t offset, decode_embedded_bp_compressed(bp, buf); - if (dump_record(dscp, buf, P2ROUNDUP(drrw->drr_psize, 8)) != 0) + uint32_t psize = drrw->drr_psize; + uint32_t rsize = P2ROUNDUP(psize, 8); + + if (psize != rsize) + memset(buf + psize, 0, rsize - psize); + + if (dump_record(dscp, buf, rsize) != 0) return (SET_ERROR(EINTR)); return (0); } @@ -1712,8 +1718,10 @@ enqueue_range(struct send_reader_thread_arg *srta, bqueue_t *q, dnode_t *dn, struct send_range *range = range_alloc(range_type, dn->dn_object, blkid, blkid + count, B_FALSE); - if (blkid == DMU_SPILL_BLKID) + if (blkid == DMU_SPILL_BLKID) { + ASSERT3P(bp, !=, NULL); ASSERT3U(BP_GET_TYPE(bp), ==, DMU_OT_SA); + } switch (range_type) { case HOLE: @@ -1834,8 +1842,7 @@ send_reader_thread(void *arg) continue; } uint64_t file_max = - (dn->dn_maxblkid < range->end_blkid ? - dn->dn_maxblkid : range->end_blkid); + MIN(dn->dn_maxblkid, range->end_blkid); /* * The object exists, so we need to try to find the * blkptr for each block in the range we're processing. diff --git a/sys/contrib/openzfs/module/zfs/dmu_traverse.c b/sys/contrib/openzfs/module/zfs/dmu_traverse.c index 377634c72bba..244b9b4cbcbc 100644 --- a/sys/contrib/openzfs/module/zfs/dmu_traverse.c +++ b/sys/contrib/openzfs/module/zfs/dmu_traverse.c @@ -185,7 +185,8 @@ static boolean_t traverse_prefetch_metadata(traverse_data_t *td, const blkptr_t *bp, const zbookmark_phys_t *zb) { - arc_flags_t flags = ARC_FLAG_NOWAIT | ARC_FLAG_PREFETCH; + arc_flags_t flags = ARC_FLAG_NOWAIT | ARC_FLAG_PREFETCH | + ARC_FLAG_PRESCIENT_PREFETCH; int zio_flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE; if (!(td->td_flags & TRAVERSE_PREFETCH_METADATA)) diff --git a/sys/contrib/openzfs/module/zfs/dmu_tx.c b/sys/contrib/openzfs/module/zfs/dmu_tx.c index 28f64369d8dd..815e27a6c7f7 100644 --- a/sys/contrib/openzfs/module/zfs/dmu_tx.c +++ b/sys/contrib/openzfs/module/zfs/dmu_tx.c @@ -214,7 +214,12 @@ dmu_tx_check_ioerr(zio_t *zio, dnode_t *dn, int level, uint64_t blkid) rw_exit(&dn->dn_struct_rwlock); if (db == NULL) return (SET_ERROR(EIO)); - err = dbuf_read(db, zio, DB_RF_CANFAIL | DB_RF_NOPREFETCH); + /* + * PARTIAL_FIRST allows caching for uncacheable blocks. It will + * be cleared after dmu_buf_will_dirty() call dbuf_read() again. + */ + err = dbuf_read(db, zio, DB_RF_CANFAIL | DB_RF_NOPREFETCH | + (level == 0 ? DB_RF_PARTIAL_FIRST : 0)); dbuf_rele(db, FTAG); return (err); } @@ -460,6 +465,7 @@ dmu_tx_hold_zap_impl(dmu_tx_hold_t *txh, const char *name) dmu_tx_t *tx = txh->txh_tx; dnode_t *dn = txh->txh_dnode; int err; + extern int zap_micro_max_size; ASSERT(tx->tx_txg == 0); @@ -475,7 +481,7 @@ dmu_tx_hold_zap_impl(dmu_tx_hold_t *txh, const char *name) * - 2 grown ptrtbl blocks */ (void) zfs_refcount_add_many(&txh->txh_space_towrite, - MZAP_MAX_BLKSZ, FTAG); + zap_micro_max_size, FTAG); if (dn == NULL) return; diff --git a/sys/contrib/openzfs/module/zfs/dmu_zfetch.c b/sys/contrib/openzfs/module/zfs/dmu_zfetch.c index 1d63d7de65a1..76b8b5608a53 100644 --- a/sys/contrib/openzfs/module/zfs/dmu_zfetch.c +++ b/sys/contrib/openzfs/module/zfs/dmu_zfetch.c @@ -517,13 +517,11 @@ dmu_zfetch_run(zstream_t *zs, boolean_t missed, boolean_t have_lock) issued = 0; for (int64_t blk = pf_start; blk < pf_end; blk++) { issued += dbuf_prefetch_impl(zf->zf_dnode, 0, blk, - ZIO_PRIORITY_ASYNC_READ, ARC_FLAG_PREDICTIVE_PREFETCH, - dmu_zfetch_done, zs); + ZIO_PRIORITY_ASYNC_READ, 0, dmu_zfetch_done, zs); } for (int64_t iblk = ipf_start; iblk < ipf_end; iblk++) { issued += dbuf_prefetch_impl(zf->zf_dnode, 1, iblk, - ZIO_PRIORITY_ASYNC_READ, ARC_FLAG_PREDICTIVE_PREFETCH, - dmu_zfetch_done, zs); + ZIO_PRIORITY_ASYNC_READ, 0, dmu_zfetch_done, zs); } if (!have_lock) diff --git a/sys/contrib/openzfs/module/zfs/dnode.c b/sys/contrib/openzfs/module/zfs/dnode.c index 2a2c66f9ff9c..0fc49018cdeb 100644 --- a/sys/contrib/openzfs/module/zfs/dnode.c +++ b/sys/contrib/openzfs/module/zfs/dnode.c @@ -2696,3 +2696,8 @@ EXPORT_SYMBOL(dnode_free_range); EXPORT_SYMBOL(dnode_evict_dbufs); EXPORT_SYMBOL(dnode_evict_bonus); #endif + +ZFS_MODULE_PARAM(zfs, zfs_, default_bs, INT, ZMOD_RW, + "Default dnode block shift"); +ZFS_MODULE_PARAM(zfs, zfs_, default_ibs, INT, ZMOD_RW, + "Default dnode indirect block shift"); diff --git a/sys/contrib/openzfs/module/zfs/dnode_sync.c b/sys/contrib/openzfs/module/zfs/dnode_sync.c index 5eabfb833ef0..8e39af83bb0a 100644 --- a/sys/contrib/openzfs/module/zfs/dnode_sync.c +++ b/sys/contrib/openzfs/module/zfs/dnode_sync.c @@ -70,8 +70,8 @@ dnode_increase_indirection(dnode_t *dn, dmu_tx_t *tx) dmu_buf_impl_t *children[DN_MAX_NBLKPTR]; ASSERT3U(nblkptr, <=, DN_MAX_NBLKPTR); for (i = 0; i < nblkptr; i++) { - children[i] = - dbuf_find(dn->dn_objset, dn->dn_object, old_toplvl, i); + children[i] = dbuf_find(dn->dn_objset, dn->dn_object, + old_toplvl, i, NULL); } /* transfer dnode's block pointers to new indirect block */ @@ -175,19 +175,21 @@ free_blocks(dnode_t *dn, blkptr_t *bp, int num, dmu_tx_t *tx) static void free_verify(dmu_buf_impl_t *db, uint64_t start, uint64_t end, dmu_tx_t *tx) { - int off, num; - int i, err, epbs; + uint64_t off, num, i, j; + unsigned int epbs; + int err; uint64_t txg = tx->tx_txg; dnode_t *dn; DB_DNODE_ENTER(db); dn = DB_DNODE(db); epbs = dn->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT; - off = start - (db->db_blkid * 1<db_blkid << epbs); num = end - start + 1; - ASSERT3U(off, >=, 0); - ASSERT3U(num, >=, 0); + ASSERT3U(dn->dn_phys->dn_indblkshift, >=, SPA_BLKPTRSHIFT); + ASSERT3U(end + 1, >=, start); + ASSERT3U(start, >=, (db->db_blkid << epbs)); ASSERT3U(db->db_level, >, 0); ASSERT3U(db->db.db_size, ==, 1 << dn->dn_phys->dn_indblkshift); ASSERT3U(off+num, <=, db->db.db_size >> SPA_BLKPTRSHIFT); @@ -197,7 +199,6 @@ free_verify(dmu_buf_impl_t *db, uint64_t start, uint64_t end, dmu_tx_t *tx) uint64_t *buf; dmu_buf_impl_t *child; dbuf_dirty_record_t *dr; - int j; ASSERT(db->db_level == 1); @@ -217,8 +218,11 @@ free_verify(dmu_buf_impl_t *db, uint64_t start, uint64_t end, dmu_tx_t *tx) for (j = 0; j < child->db.db_size >> 3; j++) { if (buf[j] != 0) { panic("freed data not zero: " - "child=%p i=%d off=%d num=%d\n", - (void *)child, i, off, num); + "child=%p i=%llu off=%llu " + "num=%llu\n", + (void *)child, (u_longlong_t)i, + (u_longlong_t)off, + (u_longlong_t)num); } } } @@ -234,8 +238,11 @@ free_verify(dmu_buf_impl_t *db, uint64_t start, uint64_t end, dmu_tx_t *tx) for (j = 0; j < child->db.db_size >> 3; j++) { if (buf[j] != 0) { panic("freed data not zero: " - "child=%p i=%d off=%d num=%d\n", - (void *)child, i, off, num); + "child=%p i=%llu off=%llu " + "num=%llu\n", + (void *)child, (u_longlong_t)i, + (u_longlong_t)off, + (u_longlong_t)num); } } } diff --git a/sys/contrib/openzfs/module/zfs/dsl_dataset.c b/sys/contrib/openzfs/module/zfs/dsl_dataset.c index 4da4effca607..57a58f88cec5 100644 --- a/sys/contrib/openzfs/module/zfs/dsl_dataset.c +++ b/sys/contrib/openzfs/module/zfs/dsl_dataset.c @@ -1698,7 +1698,6 @@ dsl_dataset_snapshot_sync_impl(dsl_dataset_t *ds, const char *snapname, dsl_dataset_phys_t *dsphys; uint64_t dsobj, crtxg; objset_t *mos = dp->dp_meta_objset; - static zil_header_t zero_zil __maybe_unused; objset_t *os __maybe_unused; ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock)); @@ -1762,16 +1761,20 @@ dsl_dataset_snapshot_sync_impl(dsl_dataset_t *ds, const char *snapname, /* * We are not allowed to dirty a filesystem when done receiving - * a snapshot. In this case the flag SPA_FEATURE_LARGE_BLOCKS will - * not be set and a subsequent encrypted raw send will fail. Hence - * activate this feature if needed here. + * a snapshot. In this case some flags such as SPA_FEATURE_LARGE_BLOCKS + * will not be set and a subsequent encrypted raw send will fail. Hence + * activate this feature if needed here. This needs to happen only in + * syncing context. */ - for (spa_feature_t f = 0; f < SPA_FEATURES; f++) { - if (zfeature_active(f, ds->ds_feature_activation[f]) && - !(zfeature_active(f, ds->ds_feature[f]))) { - dsl_dataset_activate_feature(dsobj, f, - ds->ds_feature_activation[f], tx); - ds->ds_feature[f] = ds->ds_feature_activation[f]; + if (dmu_tx_is_syncing(tx)) { + for (spa_feature_t f = 0; f < SPA_FEATURES; f++) { + if (zfeature_active(f, ds->ds_feature_activation[f]) && + !(zfeature_active(f, ds->ds_feature[f]))) { + dsl_dataset_activate_feature(dsobj, f, + ds->ds_feature_activation[f], tx); + ds->ds_feature[f] = + ds->ds_feature_activation[f]; + } } } diff --git a/sys/contrib/openzfs/module/zfs/dsl_dir.c b/sys/contrib/openzfs/module/zfs/dsl_dir.c index c1afaa6aaf82..18142cef9ff4 100644 --- a/sys/contrib/openzfs/module/zfs/dsl_dir.c +++ b/sys/contrib/openzfs/module/zfs/dsl_dir.c @@ -1186,10 +1186,9 @@ dsl_dir_space_towrite(dsl_dir_t *dd) ASSERT(MUTEX_HELD(&dd->dd_lock)); - for (int i = 0; i < TXG_SIZE; i++) { + for (int i = 0; i < TXG_SIZE; i++) space += dd->dd_space_towrite[i & TXG_MASK]; - ASSERT3U(dd->dd_space_towrite[i & TXG_MASK], >=, 0); - } + return (space); } diff --git a/sys/contrib/openzfs/module/zfs/dsl_prop.c b/sys/contrib/openzfs/module/zfs/dsl_prop.c index d1c0059092b1..a1e73f97e38a 100644 --- a/sys/contrib/openzfs/module/zfs/dsl_prop.c +++ b/sys/contrib/openzfs/module/zfs/dsl_prop.c @@ -123,7 +123,7 @@ dsl_prop_get_dd(dsl_dir_t *dd, const char *propname, /* Check for a iuv value. */ err = zap_lookup(mos, dsl_dir_phys(dd)->dd_props_zapobj, iuvstr, intsz, numints, buf); - if (dsl_prop_known_index(zfs_name_to_prop(propname), + if (err == 0 && dsl_prop_known_index(prop, *(uint64_t *)buf) != 1) err = ENOENT; if (err != ENOENT) { diff --git a/sys/contrib/openzfs/module/zfs/dsl_scan.c b/sys/contrib/openzfs/module/zfs/dsl_scan.c index 03c2aa313af0..f9e437f0c947 100644 --- a/sys/contrib/openzfs/module/zfs/dsl_scan.c +++ b/sys/contrib/openzfs/module/zfs/dsl_scan.c @@ -281,7 +281,7 @@ typedef struct scan_io { * event of an error. This array must go at the end of the * struct to allow this for the variable number of elements. */ - dva_t sio_dva[0]; + dva_t sio_dva[]; } scan_io_t; #define SIO_SET_OFFSET(sio, x) DVA_SET_OFFSET(&(sio)->sio_dva[0], x) @@ -944,13 +944,13 @@ dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx) if (dsl_scan_restarting(scn, tx)) spa_history_log_internal(spa, "scan aborted, restarting", tx, - "errors=%llu", (u_longlong_t)spa_get_errlog_size(spa)); + "errors=%llu", (u_longlong_t)spa_approx_errlog_size(spa)); else if (!complete) spa_history_log_internal(spa, "scan cancelled", tx, - "errors=%llu", (u_longlong_t)spa_get_errlog_size(spa)); + "errors=%llu", (u_longlong_t)spa_approx_errlog_size(spa)); else spa_history_log_internal(spa, "scan done", tx, - "errors=%llu", (u_longlong_t)spa_get_errlog_size(spa)); + "errors=%llu", (u_longlong_t)spa_approx_errlog_size(spa)); if (DSL_SCAN_IS_SCRUB_RESILVER(scn)) { spa->spa_scrub_active = B_FALSE; @@ -1013,7 +1013,7 @@ dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx) vdev_clear_resilver_deferred(spa->spa_root_vdev, tx)) { spa_history_log_internal(spa, "starting deferred resilver", tx, "errors=%llu", - (u_longlong_t)spa_get_errlog_size(spa)); + (u_longlong_t)spa_approx_errlog_size(spa)); spa_async_request(spa, SPA_ASYNC_RESILVER); } diff --git a/sys/contrib/openzfs/module/zfs/fm.c b/sys/contrib/openzfs/module/zfs/fm.c index 3f05d759770b..52ea6262a29f 100644 --- a/sys/contrib/openzfs/module/zfs/fm.c +++ b/sys/contrib/openzfs/module/zfs/fm.c @@ -380,8 +380,7 @@ zfs_zevent_wait(zfs_zevent_t *ze) break; } - error = cv_wait_sig(&zevent_cv, &zevent_lock); - if (signal_pending(current)) { + if (cv_wait_sig(&zevent_cv, &zevent_lock) == 0) { error = SET_ERROR(EINTR); break; } else if (!list_is_empty(&zevent_list)) { diff --git a/sys/contrib/openzfs/module/zfs/metaslab.c b/sys/contrib/openzfs/module/zfs/metaslab.c index c624833bc981..24d52a74933f 100644 --- a/sys/contrib/openzfs/module/zfs/metaslab.c +++ b/sys/contrib/openzfs/module/zfs/metaslab.c @@ -1223,7 +1223,7 @@ metaslab_group_fragmentation(metaslab_group_t *mg) */ static boolean_t metaslab_group_allocatable(metaslab_group_t *mg, metaslab_group_t *rotor, - uint64_t psize, int allocator, int d) + int flags, uint64_t psize, int allocator, int d) { spa_t *spa = mg->mg_vd->vdev_spa; metaslab_class_t *mc = mg->mg_class; @@ -1267,6 +1267,15 @@ metaslab_group_allocatable(metaslab_group_t *mg, metaslab_group_t *rotor, if (mg->mg_no_free_space) return (B_FALSE); + /* + * Some allocations (e.g., those coming from device removal + * where the * allocations are not even counted in the + * metaslab * allocation queues) are allowed to bypass + * the throttle. + */ + if (flags & METASLAB_DONT_THROTTLE) + return (B_TRUE); + /* * Relax allocation throttling for ditto blocks. Due to * random imbalances in allocation it tends to push copies @@ -5188,7 +5197,7 @@ metaslab_alloc_dva(spa_t *spa, metaslab_class_t *mc, uint64_t psize, */ if (allocatable && !GANG_ALLOCATION(flags) && !try_hard) { allocatable = metaslab_group_allocatable(mg, rotor, - psize, allocator, d); + flags, psize, allocator, d); } if (!allocatable) { diff --git a/sys/contrib/openzfs/module/zfs/spa.c b/sys/contrib/openzfs/module/zfs/spa.c index fe7051db2737..67b3a03a951a 100644 --- a/sys/contrib/openzfs/module/zfs/spa.c +++ b/sys/contrib/openzfs/module/zfs/spa.c @@ -5543,7 +5543,7 @@ spa_get_stats(const char *name, nvlist_t **config, fnvlist_add_uint64(*config, ZPOOL_CONFIG_ERRCOUNT, - spa_get_errlog_size(spa)); + spa_approx_errlog_size(spa)); if (spa_suspended(spa)) { fnvlist_add_uint64(*config, diff --git a/sys/contrib/openzfs/module/zfs/spa_config.c b/sys/contrib/openzfs/module/zfs/spa_config.c index 5165c370403b..bf4d1e210b21 100644 --- a/sys/contrib/openzfs/module/zfs/spa_config.c +++ b/sys/contrib/openzfs/module/zfs/spa_config.c @@ -356,6 +356,8 @@ spa_write_cachefile(spa_t *target, boolean_t removing, boolean_t postsysevent, vdev_post_kobj_evt(target->spa_root_vdev); for (int i = 0; i < target->spa_l2cache.sav_count; i++) vdev_post_kobj_evt(target->spa_l2cache.sav_vdevs[i]); + for (int i = 0; i < target->spa_spares.sav_count; i++) + vdev_post_kobj_evt(target->spa_spares.sav_vdevs[i]); } } diff --git a/sys/contrib/openzfs/module/zfs/spa_errlog.c b/sys/contrib/openzfs/module/zfs/spa_errlog.c index 30e1249dd3b0..c6d97eed2892 100644 --- a/sys/contrib/openzfs/module/zfs/spa_errlog.c +++ b/sys/contrib/openzfs/module/zfs/spa_errlog.c @@ -160,10 +160,8 @@ get_head_and_birth_txg(spa_t *spa, zbookmark_err_phys_t *zep, uint64_t ds_obj, dsl_dataset_t *ds; objset_t *os; - dsl_pool_config_enter(dp, FTAG); int error = dsl_dataset_hold_obj(dp, ds_obj, FTAG, &ds); if (error != 0) { - dsl_pool_config_exit(dp, FTAG); return (error); } ASSERT(head_dataset_id); @@ -172,7 +170,6 @@ get_head_and_birth_txg(spa_t *spa, zbookmark_err_phys_t *zep, uint64_t ds_obj, error = dmu_objset_from_ds(ds, &os); if (error != 0) { dsl_dataset_rele(ds, FTAG); - dsl_pool_config_exit(dp, FTAG); return (error); } @@ -189,7 +186,6 @@ get_head_and_birth_txg(spa_t *spa, zbookmark_err_phys_t *zep, uint64_t ds_obj, ZFS_KEYSTATUS_UNAVAILABLE) { zep->zb_birth = 0; dsl_dataset_rele(ds, FTAG); - dsl_pool_config_exit(dp, FTAG); return (0); } @@ -199,7 +195,6 @@ get_head_and_birth_txg(spa_t *spa, zbookmark_err_phys_t *zep, uint64_t ds_obj, error = dnode_hold(os, zep->zb_object, FTAG, &dn); if (error != 0) { dsl_dataset_rele(ds, FTAG); - dsl_pool_config_exit(dp, FTAG); return (error); } @@ -225,7 +220,6 @@ get_head_and_birth_txg(spa_t *spa, zbookmark_err_phys_t *zep, uint64_t ds_obj, rw_exit(&dn->dn_struct_rwlock); dnode_rele(dn, FTAG); dsl_dataset_rele(ds, FTAG); - dsl_pool_config_exit(dp, FTAG); return (error); } @@ -303,17 +297,31 @@ find_birth_txg(dsl_dataset_t *ds, zbookmark_err_phys_t *zep, } /* - * This function serves a double role. If only_count is true, it returns - * (in *count) how many times an error block belonging to this filesystem is - * referenced by snapshots or clones. If only_count is false, each time the - * error block is referenced by a snapshot or clone, it fills the userspace - * array at uaddr with the bookmarks of the error blocks. The array is filled - * from the back and *count is modified to be the number of unused entries at - * the beginning of the array. + * Copy the bookmark to the end of the user-space buffer which starts at + * uaddr and has *count unused entries, and decrement *count by 1. + */ +static int +copyout_entry(const zbookmark_phys_t *zb, void *uaddr, uint64_t *count) +{ + if (*count == 0) + return (SET_ERROR(ENOMEM)); + + *count -= 1; + if (copyout(zb, (char *)uaddr + (*count) * sizeof (zbookmark_phys_t), + sizeof (zbookmark_phys_t)) != 0) + return (SET_ERROR(EFAULT)); + return (0); +} + +/* + * Each time the error block is referenced by a snapshot or clone, add a + * zbookmark_phys_t entry to the userspace array at uaddr. The array is + * filled from the back and the in-out parameter *count is modified to be the + * number of unused entries at the beginning of the array. */ static int check_filesystem(spa_t *spa, uint64_t head_ds, zbookmark_err_phys_t *zep, - uint64_t *count, void *uaddr, boolean_t only_count) + void *uaddr, uint64_t *count) { dsl_dataset_t *ds; dsl_pool_t *dp = spa->spa_dsl_pool; @@ -343,18 +351,12 @@ check_filesystem(spa_t *spa, uint64_t head_ds, zbookmark_err_phys_t *zep, } if (zep->zb_birth == latest_txg) { /* Block neither free nor rewritten. */ - if (!only_count) { - zbookmark_phys_t zb; - zep_to_zb(head_ds, zep, &zb); - if (copyout(&zb, (char *)uaddr + (*count - 1) - * sizeof (zbookmark_phys_t), - sizeof (zbookmark_phys_t)) != 0) { - dsl_dataset_rele(ds, FTAG); - return (SET_ERROR(EFAULT)); - } - (*count)--; - } else { - (*count)++; + zbookmark_phys_t zb; + zep_to_zb(head_ds, zep, &zb); + error = copyout_entry(&zb, uaddr, count); + if (error != 0) { + dsl_dataset_rele(ds, FTAG); + return (error); } check_snapshot = B_FALSE; } else { @@ -407,19 +409,12 @@ check_filesystem(spa_t *spa, uint64_t head_ds, zbookmark_err_phys_t *zep, snap_obj_array[aff_snap_count] = snap_obj; aff_snap_count++; - if (!only_count) { - zbookmark_phys_t zb; - zep_to_zb(snap_obj, zep, &zb); - if (copyout(&zb, (char *)uaddr + (*count - 1) * - sizeof (zbookmark_phys_t), - sizeof (zbookmark_phys_t)) != 0) { - dsl_dataset_rele(ds, FTAG); - error = SET_ERROR(EFAULT); - goto out; - } - (*count)--; - } else { - (*count)++; + zbookmark_phys_t zb; + zep_to_zb(snap_obj, zep, &zb); + error = copyout_entry(&zb, uaddr, count); + if (error != 0) { + dsl_dataset_rele(ds, FTAG); + goto out; } /* @@ -433,8 +428,7 @@ check_filesystem(spa_t *spa, uint64_t head_ds, zbookmark_err_phys_t *zep, zap_cursor_retrieve(&zc, &za) == 0; zap_cursor_advance(&zc)) { error = check_filesystem(spa, - za.za_first_integer, zep, - count, uaddr, only_count); + za.za_first_integer, zep, uaddr, count); if (error != 0) { zap_cursor_fini(&zc); @@ -477,11 +471,8 @@ find_top_affected_fs(spa_t *spa, uint64_t head_ds, zbookmark_err_phys_t *zep, static int process_error_block(spa_t *spa, uint64_t head_ds, zbookmark_err_phys_t *zep, - uint64_t *count, void *uaddr, boolean_t only_count) + void *uaddr, uint64_t *count) { - dsl_pool_t *dp = spa->spa_dsl_pool; - uint64_t top_affected_fs; - /* * If the zb_birth is 0 it means we failed to retrieve the birth txg * of the block pointer. This happens when an encrypted filesystem is @@ -489,95 +480,24 @@ process_error_block(spa_t *spa, uint64_t head_ds, zbookmark_err_phys_t *zep, * check_filesystem(), instead do the accounting here. */ if (zep->zb_birth == 0) { - if (!only_count) { - zbookmark_phys_t zb; - zep_to_zb(head_ds, zep, &zb); - if (copyout(&zb, (char *)uaddr + (*count - 1) - * sizeof (zbookmark_phys_t), - sizeof (zbookmark_phys_t)) != 0) { - return (SET_ERROR(EFAULT)); - } - (*count)--; - } else { - (*count)++; + zbookmark_phys_t zb; + zep_to_zb(head_ds, zep, &zb); + int error = copyout_entry(&zb, uaddr, count); + if (error != 0) { + return (error); } return (0); } - dsl_pool_config_enter(dp, FTAG); + uint64_t top_affected_fs; int error = find_top_affected_fs(spa, head_ds, zep, &top_affected_fs); - if (error == 0) - error = check_filesystem(spa, top_affected_fs, zep, count, - uaddr, only_count); + if (error == 0) { + error = check_filesystem(spa, top_affected_fs, zep, + uaddr, count); + } - dsl_pool_config_exit(dp, FTAG); return (error); } - -static uint64_t -get_errlog_size(spa_t *spa, uint64_t spa_err_obj) -{ - if (spa_err_obj == 0) - return (0); - uint64_t total = 0; - - zap_cursor_t zc; - zap_attribute_t za; - for (zap_cursor_init(&zc, spa->spa_meta_objset, spa_err_obj); - zap_cursor_retrieve(&zc, &za) == 0; zap_cursor_advance(&zc)) { - - zap_cursor_t head_ds_cursor; - zap_attribute_t head_ds_attr; - zbookmark_err_phys_t head_ds_block; - - uint64_t head_ds; - name_to_object(za.za_name, &head_ds); - - for (zap_cursor_init(&head_ds_cursor, spa->spa_meta_objset, - za.za_first_integer); zap_cursor_retrieve(&head_ds_cursor, - &head_ds_attr) == 0; zap_cursor_advance(&head_ds_cursor)) { - - name_to_errphys(head_ds_attr.za_name, &head_ds_block); - (void) process_error_block(spa, head_ds, &head_ds_block, - &total, NULL, B_TRUE); - } - zap_cursor_fini(&head_ds_cursor); - } - zap_cursor_fini(&zc); - return (total); -} - -static uint64_t -get_errlist_size(spa_t *spa, avl_tree_t *tree) -{ - if (avl_numnodes(tree) == 0) - return (0); - uint64_t total = 0; - - spa_error_entry_t *se; - for (se = avl_first(tree); se != NULL; se = AVL_NEXT(tree, se)) { - zbookmark_err_phys_t zep; - zep.zb_object = se->se_bookmark.zb_object; - zep.zb_level = se->se_bookmark.zb_level; - zep.zb_blkid = se->se_bookmark.zb_blkid; - zep.zb_birth = 0; - - /* - * If we cannot find out the head dataset and birth txg of - * the present error block, we opt not to error out. In the - * next pool sync this information will be retrieved by - * sync_error_list() and written to the on-disk error log. - */ - uint64_t head_ds_obj; - int error = get_head_and_birth_txg(spa, &zep, - se->se_bookmark.zb_objset, &head_ds_obj); - - if (!error) - (void) process_error_block(spa, head_ds_obj, &zep, - &total, NULL, B_TRUE); - } - return (total); -} #endif /* @@ -677,13 +597,33 @@ spa_remove_error(spa_t *spa, zbookmark_phys_t *zb) spa_add_healed_error(spa, spa->spa_errlog_scrub, zb); } +static uint64_t +approx_errlog_size_impl(spa_t *spa, uint64_t spa_err_obj) +{ + if (spa_err_obj == 0) + return (0); + uint64_t total = 0; + + zap_cursor_t zc; + zap_attribute_t za; + for (zap_cursor_init(&zc, spa->spa_meta_objset, spa_err_obj); + zap_cursor_retrieve(&zc, &za) == 0; zap_cursor_advance(&zc)) { + uint64_t count; + if (zap_count(spa->spa_meta_objset, za.za_first_integer, + &count) == 0) + total += count; + } + zap_cursor_fini(&zc); + return (total); +} + /* - * Return the number of errors currently in the error log. This is actually the - * sum of both the last log and the current log, since we don't know the union - * of these logs until we reach userland. + * Return the approximate number of errors currently in the error log. This + * will be nonzero if there are some errors, but otherwise it may be more + * or less than the number of entries returned by spa_get_errlog(). */ uint64_t -spa_get_errlog_size(spa_t *spa) +spa_approx_errlog_size(spa_t *spa) { uint64_t total = 0; @@ -701,23 +641,16 @@ spa_get_errlog_size(spa_t *spa) total += count; mutex_exit(&spa->spa_errlog_lock); - mutex_enter(&spa->spa_errlist_lock); - total += avl_numnodes(&spa->spa_errlist_last); - total += avl_numnodes(&spa->spa_errlist_scrub); - mutex_exit(&spa->spa_errlist_lock); } else { -#ifdef _KERNEL mutex_enter(&spa->spa_errlog_lock); - total += get_errlog_size(spa, spa->spa_errlog_last); - total += get_errlog_size(spa, spa->spa_errlog_scrub); + total += approx_errlog_size_impl(spa, spa->spa_errlog_last); + total += approx_errlog_size_impl(spa, spa->spa_errlog_scrub); mutex_exit(&spa->spa_errlog_lock); - - mutex_enter(&spa->spa_errlist_lock); - total += get_errlist_size(spa, &spa->spa_errlist_last); - total += get_errlist_size(spa, &spa->spa_errlist_scrub); - mutex_exit(&spa->spa_errlist_lock); -#endif } + mutex_enter(&spa->spa_errlist_lock); + total += avl_numnodes(&spa->spa_errlist_last); + total += avl_numnodes(&spa->spa_errlist_scrub); + mutex_exit(&spa->spa_errlist_lock); return (total); } @@ -860,8 +793,7 @@ spa_upgrade_errlog(spa_t *spa, dmu_tx_t *tx) #ifdef _KERNEL /* - * If an error block is shared by two datasets it will be counted twice. For - * detailed message see spa_get_errlog_size() above. + * If an error block is shared by two datasets it will be counted twice. */ static int process_error_log(spa_t *spa, uint64_t obj, void *uaddr, uint64_t *count) @@ -884,14 +816,11 @@ process_error_log(spa_t *spa, uint64_t obj, void *uaddr, uint64_t *count) zbookmark_phys_t zb; name_to_bookmark(za.za_name, &zb); - if (copyout(&zb, (char *)uaddr + - (*count - 1) * sizeof (zbookmark_phys_t), - sizeof (zbookmark_phys_t)) != 0) { + int error = copyout_entry(&zb, uaddr, count); + if (error != 0) { zap_cursor_fini(&zc); - return (SET_ERROR(EFAULT)); + return (error); } - *count -= 1; - } zap_cursor_fini(&zc); return (0); @@ -914,7 +843,7 @@ process_error_log(spa_t *spa, uint64_t obj, void *uaddr, uint64_t *count) zbookmark_err_phys_t head_ds_block; name_to_errphys(head_ds_attr.za_name, &head_ds_block); int error = process_error_block(spa, head_ds, - &head_ds_block, count, uaddr, B_FALSE); + &head_ds_block, uaddr, count); if (error != 0) { zap_cursor_fini(&head_ds_cursor); @@ -936,16 +865,11 @@ process_error_list(spa_t *spa, avl_tree_t *list, void *uaddr, uint64_t *count) if (!spa_feature_is_enabled(spa, SPA_FEATURE_HEAD_ERRLOG)) { for (se = avl_first(list); se != NULL; se = AVL_NEXT(list, se)) { - - if (*count == 0) - return (SET_ERROR(ENOMEM)); - - if (copyout(&se->se_bookmark, (char *)uaddr + - (*count - 1) * sizeof (zbookmark_phys_t), - sizeof (zbookmark_phys_t)) != 0) - return (SET_ERROR(EFAULT)); - - *count -= 1; + int error = + copyout_entry(&se->se_bookmark, uaddr, count); + if (error != 0) { + return (error); + } } return (0); } @@ -963,7 +887,7 @@ process_error_list(spa_t *spa, avl_tree_t *list, void *uaddr, uint64_t *count) if (!error) error = process_error_block(spa, head_ds_obj, &zep, - count, uaddr, B_FALSE); + uaddr, count); if (error) return (error); } @@ -988,6 +912,12 @@ spa_get_errlog(spa_t *spa, void *uaddr, uint64_t *count) int ret = 0; #ifdef _KERNEL + /* + * The pool config lock is needed to hold a dataset_t via (among other + * places) process_error_list() -> get_head_and_birth_txg(), and lock + * ordering requires that we get it before the spa_errlog_lock. + */ + dsl_pool_config_enter(spa->spa_dsl_pool, FTAG); mutex_enter(&spa->spa_errlog_lock); ret = process_error_log(spa, spa->spa_errlog_scrub, uaddr, count); @@ -1006,6 +936,7 @@ spa_get_errlog(spa_t *spa, void *uaddr, uint64_t *count) mutex_exit(&spa->spa_errlist_lock); mutex_exit(&spa->spa_errlog_lock); + dsl_pool_config_exit(spa->spa_dsl_pool, FTAG); #else (void) spa, (void) uaddr, (void) count; #endif @@ -1174,6 +1105,13 @@ spa_errlog_sync(spa_t *spa, uint64_t txg) spa->spa_scrub_finished = B_FALSE; mutex_exit(&spa->spa_errlist_lock); + + /* + * The pool config lock is needed to hold a dataset_t via + * sync_error_list() -> get_head_and_birth_txg(), and lock ordering + * requires that we get it before the spa_errlog_lock. + */ + dsl_pool_config_enter(spa->spa_dsl_pool, FTAG); mutex_enter(&spa->spa_errlog_lock); tx = dmu_tx_create_assigned(spa->spa_dsl_pool, txg); @@ -1218,6 +1156,7 @@ spa_errlog_sync(spa_t *spa, uint64_t txg) dmu_tx_commit(tx); mutex_exit(&spa->spa_errlog_lock); + dsl_pool_config_exit(spa->spa_dsl_pool, FTAG); } static void @@ -1354,7 +1293,7 @@ spa_swap_errlog(spa_t *spa, uint64_t new_head_ds, uint64_t old_head_ds, #if defined(_KERNEL) /* error handling */ EXPORT_SYMBOL(spa_log_error); -EXPORT_SYMBOL(spa_get_errlog_size); +EXPORT_SYMBOL(spa_approx_errlog_size); EXPORT_SYMBOL(spa_get_errlog); EXPORT_SYMBOL(spa_errlog_rotate); EXPORT_SYMBOL(spa_errlog_drain); diff --git a/sys/contrib/openzfs/module/zfs/vdev.c b/sys/contrib/openzfs/module/zfs/vdev.c index 891744261014..8f3e461bae7b 100644 --- a/sys/contrib/openzfs/module/zfs/vdev.c +++ b/sys/contrib/openzfs/module/zfs/vdev.c @@ -389,6 +389,31 @@ vdev_get_nparity(vdev_t *vd) return (nparity); } +static int +vdev_prop_get_int(vdev_t *vd, vdev_prop_t prop, uint64_t *value) +{ + spa_t *spa = vd->vdev_spa; + objset_t *mos = spa->spa_meta_objset; + uint64_t objid; + int err; + + if (vd->vdev_top_zap != 0) { + objid = vd->vdev_top_zap; + } else if (vd->vdev_leaf_zap != 0) { + objid = vd->vdev_leaf_zap; + } else { + return (EINVAL); + } + + err = zap_lookup(mos, objid, vdev_prop_to_name(prop), + sizeof (uint64_t), 1, value); + + if (err == ENOENT) + *value = vdev_prop_default_numeric(prop); + + return (err); +} + /* * Get the number of data disks for a top-level vdev. */ @@ -642,6 +667,14 @@ vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops) zfs_ratelimit_init(&vd->vdev_checksum_rl, &zfs_checksum_events_per_second, 1); + /* + * Default Thresholds for tuning ZED + */ + vd->vdev_checksum_n = vdev_prop_default_numeric(VDEV_PROP_CHECKSUM_N); + vd->vdev_checksum_t = vdev_prop_default_numeric(VDEV_PROP_CHECKSUM_T); + vd->vdev_io_n = vdev_prop_default_numeric(VDEV_PROP_IO_N); + vd->vdev_io_t = vdev_prop_default_numeric(VDEV_PROP_IO_T); + list_link_init(&vd->vdev_config_dirty_node); list_link_init(&vd->vdev_state_dirty_node); list_link_init(&vd->vdev_initialize_node); @@ -3597,6 +3630,39 @@ vdev_load(vdev_t *vd) } } + if (vd->vdev_top_zap != 0 || vd->vdev_leaf_zap != 0) { + uint64_t zapobj; + + if (vd->vdev_top_zap != 0) + zapobj = vd->vdev_top_zap; + else + zapobj = vd->vdev_leaf_zap; + + error = vdev_prop_get_int(vd, VDEV_PROP_CHECKSUM_N, + &vd->vdev_checksum_n); + if (error && error != ENOENT) + vdev_dbgmsg(vd, "vdev_load: zap_lookup(zap=%llu) " + "failed [error=%d]", (u_longlong_t)zapobj, error); + + error = vdev_prop_get_int(vd, VDEV_PROP_CHECKSUM_T, + &vd->vdev_checksum_t); + if (error && error != ENOENT) + vdev_dbgmsg(vd, "vdev_load: zap_lookup(zap=%llu) " + "failed [error=%d]", (u_longlong_t)zapobj, error); + + error = vdev_prop_get_int(vd, VDEV_PROP_IO_N, + &vd->vdev_io_n); + if (error && error != ENOENT) + vdev_dbgmsg(vd, "vdev_load: zap_lookup(zap=%llu) " + "failed [error=%d]", (u_longlong_t)zapobj, error); + + error = vdev_prop_get_int(vd, VDEV_PROP_IO_T, + &vd->vdev_io_t); + if (error && error != ENOENT) + vdev_dbgmsg(vd, "vdev_load: zap_lookup(zap=%llu) " + "failed [error=%d]", (u_longlong_t)zapobj, error); + } + /* * If this is a top-level vdev, initialize its metaslabs. */ @@ -5736,6 +5802,34 @@ vdev_prop_set(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl) } vd->vdev_failfast = intval & 1; break; + case VDEV_PROP_CHECKSUM_N: + if (nvpair_value_uint64(elem, &intval) != 0) { + error = EINVAL; + break; + } + vd->vdev_checksum_n = intval; + break; + case VDEV_PROP_CHECKSUM_T: + if (nvpair_value_uint64(elem, &intval) != 0) { + error = EINVAL; + break; + } + vd->vdev_checksum_t = intval; + break; + case VDEV_PROP_IO_N: + if (nvpair_value_uint64(elem, &intval) != 0) { + error = EINVAL; + break; + } + vd->vdev_io_n = intval; + break; + case VDEV_PROP_IO_T: + if (nvpair_value_uint64(elem, &intval) != 0) { + error = EINVAL; + break; + } + vd->vdev_io_t = intval; + break; default: /* Most processing is done in vdev_props_set_sync */ break; @@ -6025,28 +6119,25 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl) continue; /* Numeric Properites */ case VDEV_PROP_ALLOCATING: - src = ZPROP_SRC_LOCAL; - strval = NULL; - - err = zap_lookup(mos, objid, nvpair_name(elem), - sizeof (uint64_t), 1, &intval); - if (err == ENOENT) { - intval = - vdev_prop_default_numeric(prop); - err = 0; - } else if (err) - break; - if (intval == vdev_prop_default_numeric(prop)) - src = ZPROP_SRC_DEFAULT; - /* Leaf vdevs cannot have this property */ if (vd->vdev_mg == NULL && vd->vdev_top != NULL) { src = ZPROP_SRC_NONE; intval = ZPROP_BOOLEAN_NA; + } else { + err = vdev_prop_get_int(vd, prop, + &intval); + if (err && err != ENOENT) + break; + + if (intval == + vdev_prop_default_numeric(prop)) + src = ZPROP_SRC_DEFAULT; + else + src = ZPROP_SRC_LOCAL; } - vdev_prop_add_list(outnvl, propname, strval, + vdev_prop_add_list(outnvl, propname, NULL, intval, src); break; case VDEV_PROP_FAILFAST: @@ -6068,6 +6159,22 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl) vdev_prop_add_list(outnvl, propname, strval, intval, src); break; + case VDEV_PROP_CHECKSUM_N: + case VDEV_PROP_CHECKSUM_T: + case VDEV_PROP_IO_N: + case VDEV_PROP_IO_T: + err = vdev_prop_get_int(vd, prop, &intval); + if (err && err != ENOENT) + break; + + if (intval == vdev_prop_default_numeric(prop)) + src = ZPROP_SRC_DEFAULT; + else + src = ZPROP_SRC_LOCAL; + + vdev_prop_add_list(outnvl, propname, NULL, + intval, src); + break; /* Text Properties */ case VDEV_PROP_COMMENT: /* Exists in the ZAP below */ diff --git a/sys/contrib/openzfs/module/zfs/vdev_indirect.c b/sys/contrib/openzfs/module/zfs/vdev_indirect.c index b70e8edfafd3..8c11a574ae86 100644 --- a/sys/contrib/openzfs/module/zfs/vdev_indirect.c +++ b/sys/contrib/openzfs/module/zfs/vdev_indirect.c @@ -270,7 +270,7 @@ typedef struct indirect_split { */ indirect_child_t *is_good_child; - indirect_child_t is_child[1]; /* variable-length */ + indirect_child_t is_child[]; } indirect_split_t; /* diff --git a/sys/contrib/openzfs/module/zfs/vdev_raidz_math_powerpc_altivec_common.h b/sys/contrib/openzfs/module/zfs/vdev_raidz_math_powerpc_altivec_common.h index 46d42c5e2417..f76eb47a9c66 100644 --- a/sys/contrib/openzfs/module/zfs/vdev_raidz_math_powerpc_altivec_common.h +++ b/sys/contrib/openzfs/module/zfs/vdev_raidz_math_powerpc_altivec_common.h @@ -26,10 +26,6 @@ #include #include -#ifdef __linux__ -#define __asm __asm__ __volatile__ -#endif - #define _REG_CNT(_0, _1, _2, _3, _4, _5, _6, _7, N, ...) N #define REG_CNT(r...) _REG_CNT(r, 8, 7, 6, 5, 4, 3, 2, 1) @@ -142,7 +138,7 @@ typedef struct v { { \ switch (REG_CNT(r)) { \ case 8: \ - __asm( \ + __asm__ __volatile__( \ "lvx 21,0,%[SRC0]\n" \ "lvx 20,0,%[SRC1]\n" \ "lvx 19,0,%[SRC2]\n" \ @@ -172,7 +168,7 @@ typedef struct v { : "v18", "v19", "v20", "v21"); \ break; \ case 4: \ - __asm( \ + __asm__ __volatile__( \ "lvx 21,0,%[SRC0]\n" \ "lvx 20,0,%[SRC1]\n" \ "lvx 19,0,%[SRC2]\n" \ @@ -189,7 +185,7 @@ typedef struct v { : "v18", "v19", "v20", "v21"); \ break; \ case 2: \ - __asm( \ + __asm__ __volatile__( \ "lvx 21,0,%[SRC0]\n" \ "lvx 20,0,%[SRC1]\n" \ "vxor " VR0(r) "," VR0(r) ",21\n" \ @@ -208,7 +204,7 @@ typedef struct v { { \ switch (REG_CNT(r)) { \ case 8: \ - __asm( \ + __asm__ __volatile__( \ "vxor " VR4(r) "," VR4(r) "," VR0(r) "\n" \ "vxor " VR5(r) "," VR5(r) "," VR1(r) "\n" \ "vxor " VR6(r) "," VR6(r) "," VR2(r) "\n" \ @@ -217,7 +213,7 @@ typedef struct v { : RVR0(r), RVR1(r), RVR2(r), RVR3(r)); \ break; \ case 4: \ - __asm( \ + __asm__ __volatile__( \ "vxor " VR2(r) "," VR2(r) "," VR0(r) "\n" \ "vxor " VR3(r) "," VR3(r) "," VR1(r) "\n" \ : UVR2(r), UVR3(r) \ @@ -232,7 +228,7 @@ typedef struct v { { \ switch (REG_CNT(r)) { \ case 8: \ - __asm( \ + __asm__ __volatile__( \ "vxor " VR0(r) "," VR0(r) "," VR0(r) "\n" \ "vxor " VR1(r) "," VR1(r) "," VR1(r) "\n" \ "vxor " VR2(r) "," VR2(r) "," VR2(r) "\n" \ @@ -245,7 +241,7 @@ typedef struct v { WVR4(r), WVR5(r), WVR6(r), WVR7(r)); \ break; \ case 4: \ - __asm( \ + __asm__ __volatile__( \ "vxor " VR0(r) "," VR0(r) "," VR0(r) "\n" \ "vxor " VR1(r) "," VR1(r) "," VR1(r) "\n" \ "vxor " VR2(r) "," VR2(r) "," VR2(r) "\n" \ @@ -253,7 +249,7 @@ typedef struct v { : WVR0(r), WVR1(r), WVR2(r), WVR3(r)); \ break; \ case 2: \ - __asm( \ + __asm__ __volatile__( \ "vxor " VR0(r) "," VR0(r) "," VR0(r) "\n" \ "vxor " VR1(r) "," VR1(r) "," VR1(r) "\n" \ : WVR0(r), WVR1(r)); \ @@ -267,7 +263,7 @@ typedef struct v { { \ switch (REG_CNT(r)) { \ case 8: \ - __asm( \ + __asm__ __volatile__( \ "vor " VR4(r) "," VR0(r) "," VR0(r) "\n" \ "vor " VR5(r) "," VR1(r) "," VR1(r) "\n" \ "vor " VR6(r) "," VR2(r) "," VR2(r) "\n" \ @@ -276,7 +272,7 @@ typedef struct v { : RVR0(r), RVR1(r), RVR2(r), RVR3(r)); \ break; \ case 4: \ - __asm( \ + __asm__ __volatile__( \ "vor " VR2(r) "," VR0(r) "," VR0(r) "\n" \ "vor " VR3(r) "," VR1(r) "," VR1(r) "\n" \ : WVR2(r), WVR3(r) \ @@ -291,7 +287,7 @@ typedef struct v { { \ switch (REG_CNT(r)) { \ case 8: \ - __asm( \ + __asm__ __volatile__( \ "lvx " VR0(r) " ,0,%[SRC0]\n" \ "lvx " VR1(r) " ,0,%[SRC1]\n" \ "lvx " VR2(r) " ,0,%[SRC2]\n" \ @@ -312,7 +308,7 @@ typedef struct v { [SRC7] "r" ((OFFSET(src, 112)))); \ break; \ case 4: \ - __asm( \ + __asm__ __volatile__( \ "lvx " VR0(r) " ,0,%[SRC0]\n" \ "lvx " VR1(r) " ,0,%[SRC1]\n" \ "lvx " VR2(r) " ,0,%[SRC2]\n" \ @@ -324,7 +320,7 @@ typedef struct v { [SRC3] "r" ((OFFSET(src, 48)))); \ break; \ case 2: \ - __asm( \ + __asm__ __volatile__( \ "lvx " VR0(r) " ,0,%[SRC0]\n" \ "lvx " VR1(r) " ,0,%[SRC1]\n" \ : WVR0(r), WVR1(r) \ @@ -340,7 +336,7 @@ typedef struct v { { \ switch (REG_CNT(r)) { \ case 8: \ - __asm( \ + __asm__ __volatile__( \ "stvx " VR0(r) " ,0,%[DST0]\n" \ "stvx " VR1(r) " ,0,%[DST1]\n" \ "stvx " VR2(r) " ,0,%[DST2]\n" \ @@ -362,7 +358,7 @@ typedef struct v { : "memory"); \ break; \ case 4: \ - __asm( \ + __asm__ __volatile__( \ "stvx " VR0(r) " ,0,%[DST0]\n" \ "stvx " VR1(r) " ,0,%[DST1]\n" \ "stvx " VR2(r) " ,0,%[DST2]\n" \ @@ -375,7 +371,7 @@ typedef struct v { : "memory"); \ break; \ case 2: \ - __asm( \ + __asm__ __volatile__( \ "stvx " VR0(r) " ,0,%[DST0]\n" \ "stvx " VR1(r) " ,0,%[DST1]\n" \ : : [DST0] "r" ((OFFSET(dst, 0))), \ @@ -400,7 +396,7 @@ typedef struct v { #define MUL2_SETUP() \ { \ - __asm( \ + __asm__ __volatile__( \ "vspltisb " VR(16) ",14\n" \ "vspltisb " VR(17) ",15\n" \ "vaddubm " VR(16) "," VR(17) "," VR(16) "\n" \ @@ -412,7 +408,7 @@ typedef struct v { { \ switch (REG_CNT(r)) { \ case 4: \ - __asm( \ + __asm__ __volatile__( \ "vcmpgtsb 19," VR(17) "," VR0(r) "\n" \ "vcmpgtsb 18," VR(17) "," VR1(r) "\n" \ "vcmpgtsb 21," VR(17) "," VR2(r) "\n" \ @@ -434,7 +430,7 @@ typedef struct v { : "v18", "v19", "v20", "v21"); \ break; \ case 2: \ - __asm( \ + __asm__ __volatile__( \ "vcmpgtsb 19," VR(17) "," VR0(r) "\n" \ "vcmpgtsb 18," VR(17) "," VR1(r) "\n" \ "vand 19,19," VR(16) "\n" \ @@ -478,7 +474,7 @@ typedef struct v { { \ switch (REG_CNT(r)) { \ case 2: \ - __asm( \ + __asm__ __volatile__( \ /* lts for upper part */ \ "vspltisb 15,15\n" \ "lvx 10,0,%[lt0]\n" \ diff --git a/sys/contrib/openzfs/module/zfs/vdev_removal.c b/sys/contrib/openzfs/module/zfs/vdev_removal.c index 53592dbfdfa0..aaa88eb89e10 100644 --- a/sys/contrib/openzfs/module/zfs/vdev_removal.c +++ b/sys/contrib/openzfs/module/zfs/vdev_removal.c @@ -1168,11 +1168,11 @@ spa_vdev_copy_segment(vdev_t *vd, range_tree_t *segs, metaslab_class_t *mc = mg->mg_class; if (mc->mc_groups == 0) mc = spa_normal_class(spa); - int error = metaslab_alloc_dva(spa, mc, size, &dst, 0, NULL, txg, 0, - zal, 0); + int error = metaslab_alloc_dva(spa, mc, size, &dst, 0, NULL, txg, + METASLAB_DONT_THROTTLE, zal, 0); if (error == ENOSPC && mc != spa_normal_class(spa)) { error = metaslab_alloc_dva(spa, spa_normal_class(spa), size, - &dst, 0, NULL, txg, 0, zal, 0); + &dst, 0, NULL, txg, METASLAB_DONT_THROTTLE, zal, 0); } if (error != 0) return (error); diff --git a/sys/contrib/openzfs/module/zfs/zap.c b/sys/contrib/openzfs/module/zfs/zap.c index 7ba5ad9ea5af..dde05d7005c2 100644 --- a/sys/contrib/openzfs/module/zfs/zap.c +++ b/sys/contrib/openzfs/module/zfs/zap.c @@ -946,9 +946,9 @@ fzap_length(zap_name_t *zn, if (err != 0) goto out; - if (integer_size != 0) + if (integer_size != NULL) *integer_size = zeh.zeh_integer_size; - if (num_integers != 0) + if (num_integers != NULL) *num_integers = zeh.zeh_num_integers; out: zap_put_leaf(l); diff --git a/sys/contrib/openzfs/module/zfs/zap_micro.c b/sys/contrib/openzfs/module/zfs/zap_micro.c index 606f426404cc..d6ad8b2b8bc5 100644 --- a/sys/contrib/openzfs/module/zfs/zap_micro.c +++ b/sys/contrib/openzfs/module/zfs/zap_micro.c @@ -41,6 +41,8 @@ #include #endif +int zap_micro_max_size = MZAP_MAX_BLKSZ; + static int mzap_upgrade(zap_t **zapp, const void *tag, dmu_tx_t *tx, zap_flags_t flags); @@ -568,7 +570,7 @@ zap_lockdir_impl(dmu_buf_t *db, const void *tag, dmu_tx_t *tx, if (zap->zap_ismicro && tx && adding && zap->zap_m.zap_num_entries == zap->zap_m.zap_num_chunks) { uint64_t newsz = db->db_size + SPA_MINBLOCKSIZE; - if (newsz > MZAP_MAX_BLKSZ) { + if (newsz > zap_micro_max_size) { dprintf("upgrading obj %llu: num_entries=%u\n", (u_longlong_t)obj, zap->zap_m.zap_num_entries); *zapp = zap; @@ -1724,4 +1726,8 @@ EXPORT_SYMBOL(zap_cursor_advance); EXPORT_SYMBOL(zap_cursor_serialize); EXPORT_SYMBOL(zap_cursor_init_serialized); EXPORT_SYMBOL(zap_get_stats); + +/* CSTYLED */ +ZFS_MODULE_PARAM(zfs, , zap_micro_max_size, INT, ZMOD_RW, + "Maximum micro ZAP size, before converting to a fat ZAP, in bytes"); #endif diff --git a/sys/contrib/openzfs/module/zfs/zcp.c b/sys/contrib/openzfs/module/zfs/zcp.c index 5ebf1bbbc8cc..89ed4f91faa3 100644 --- a/sys/contrib/openzfs/module/zfs/zcp.c +++ b/sys/contrib/openzfs/module/zfs/zcp.c @@ -958,12 +958,12 @@ zcp_eval_impl(dmu_tx_t *tx, zcp_run_info_t *ri) } static void -zcp_pool_error(zcp_run_info_t *ri, const char *poolname) +zcp_pool_error(zcp_run_info_t *ri, const char *poolname, int error) { ri->zri_result = SET_ERROR(ECHRNG); lua_settop(ri->zri_state, 0); - (void) lua_pushfstring(ri->zri_state, "Could not open pool: %s", - poolname); + (void) lua_pushfstring(ri->zri_state, "Could not open pool: %s " + "errno: %d", poolname, error); zcp_convert_return_values(ri->zri_state, ri->zri_outnvl, ZCP_RET_ERROR, &ri->zri_result); @@ -1013,7 +1013,7 @@ zcp_eval_open(zcp_run_info_t *ri, const char *poolname) error = dsl_pool_hold(poolname, FTAG, &dp); if (error != 0) { - zcp_pool_error(ri, poolname); + zcp_pool_error(ri, poolname, error); return; } @@ -1159,7 +1159,7 @@ zcp_eval(const char *poolname, const char *program, boolean_t sync, err = dsl_sync_task_sig(poolname, NULL, zcp_eval_sync, zcp_eval_sig, &runinfo, 0, ZFS_SPACE_CHECK_ZCP_EVAL); if (err != 0) - zcp_pool_error(&runinfo, poolname); + zcp_pool_error(&runinfo, poolname, err); } else { zcp_eval_open(&runinfo, poolname); } diff --git a/sys/contrib/openzfs/module/zfs/zfs_chksum.c b/sys/contrib/openzfs/module/zfs/zfs_chksum.c index 4a9a36d87e66..91247f29278f 100644 --- a/sys/contrib/openzfs/module/zfs/zfs_chksum.c +++ b/sys/contrib/openzfs/module/zfs/zfs_chksum.c @@ -251,7 +251,7 @@ chksum_benchmark(void) /* space for the benchmark times */ chksum_stat_cnt = 4; chksum_stat_cnt += blake3_impl_getcnt(); - chksum_stat_data = (chksum_stat_t *)kmem_zalloc( + chksum_stat_data = kmem_zalloc( sizeof (chksum_stat_t) * chksum_stat_cnt, KM_SLEEP); /* edonr - needs to be the first one here (slow CPU check) */ diff --git a/sys/contrib/openzfs/module/zfs/zfs_fm.c b/sys/contrib/openzfs/module/zfs/zfs_fm.c index fd0dc7d69bf8..7169e49ac46a 100644 --- a/sys/contrib/openzfs/module/zfs/zfs_fm.c +++ b/sys/contrib/openzfs/module/zfs/zfs_fm.c @@ -200,6 +200,42 @@ recent_events_compare(const void *a, const void *b) return (0); } +/* + * workaround: vdev properties don't have inheritance + */ +static uint64_t +vdev_prop_get_inherited(vdev_t *vd, vdev_prop_t prop) +{ + uint64_t propdef, propval; + + propdef = vdev_prop_default_numeric(prop); + switch (prop) { + case VDEV_PROP_CHECKSUM_N: + propval = vd->vdev_checksum_n; + break; + case VDEV_PROP_CHECKSUM_T: + propval = vd->vdev_checksum_t; + break; + case VDEV_PROP_IO_N: + propval = vd->vdev_io_n; + break; + case VDEV_PROP_IO_T: + propval = vd->vdev_io_t; + break; + default: + propval = propdef; + break; + } + + if (propval != propdef) + return (propval); + + if (vd->vdev_parent == NULL) + return (propdef); + + return (vdev_prop_get_inherited(vd->vdev_parent, prop)); +} + static void zfs_ereport_schedule_cleaner(void); /* @@ -662,6 +698,49 @@ zfs_ereport_start(nvlist_t **ereport_out, nvlist_t **detector_out, DATA_TYPE_UINT64, zb->zb_blkid, NULL); } + /* + * Payload for tuning the zed + */ + if (vd != NULL && strcmp(subclass, FM_EREPORT_ZFS_CHECKSUM) == 0) { + uint64_t cksum_n, cksum_t; + + cksum_n = vdev_prop_get_inherited(vd, VDEV_PROP_CHECKSUM_N); + if (cksum_n != vdev_prop_default_numeric(VDEV_PROP_CHECKSUM_N)) + fm_payload_set(ereport, + FM_EREPORT_PAYLOAD_ZFS_VDEV_CKSUM_N, + DATA_TYPE_UINT64, + cksum_n, + NULL); + + cksum_t = vdev_prop_get_inherited(vd, VDEV_PROP_CHECKSUM_T); + if (cksum_t != vdev_prop_default_numeric(VDEV_PROP_CHECKSUM_T)) + fm_payload_set(ereport, + FM_EREPORT_PAYLOAD_ZFS_VDEV_CKSUM_T, + DATA_TYPE_UINT64, + cksum_t, + NULL); + } + + if (vd != NULL && strcmp(subclass, FM_EREPORT_ZFS_IO) == 0) { + uint64_t io_n, io_t; + + io_n = vdev_prop_get_inherited(vd, VDEV_PROP_IO_N); + if (io_n != vdev_prop_default_numeric(VDEV_PROP_IO_N)) + fm_payload_set(ereport, + FM_EREPORT_PAYLOAD_ZFS_VDEV_IO_N, + DATA_TYPE_UINT64, + io_n, + NULL); + + io_t = vdev_prop_get_inherited(vd, VDEV_PROP_IO_T); + if (io_t != vdev_prop_default_numeric(VDEV_PROP_IO_T)) + fm_payload_set(ereport, + FM_EREPORT_PAYLOAD_ZFS_VDEV_IO_T, + DATA_TYPE_UINT64, + io_t, + NULL); + } + mutex_exit(&spa->spa_errlist_lock); *ereport_out = ereport; diff --git a/sys/contrib/openzfs/module/zfs/zfs_fuid.c b/sys/contrib/openzfs/module/zfs/zfs_fuid.c index 35466c486ab3..e2e066b0e99b 100644 --- a/sys/contrib/openzfs/module/zfs/zfs_fuid.c +++ b/sys/contrib/openzfs/module/zfs/zfs_fuid.c @@ -622,7 +622,7 @@ zfs_fuid_create(zfsvfs_t *zfsvfs, uint64_t id, cred_t *cr, rid = FUID_RID(fuidp->z_fuid_group); idx = FUID_INDEX(fuidp->z_fuid_group); break; - }; + } domain = fuidp->z_domain_table[idx - 1]; } else { if (type == ZFS_OWNER || type == ZFS_ACE_USER) diff --git a/sys/contrib/openzfs/module/zfs/zfs_ioctl.c b/sys/contrib/openzfs/module/zfs/zfs_ioctl.c index a5168b937588..a1717d4d6038 100644 --- a/sys/contrib/openzfs/module/zfs/zfs_ioctl.c +++ b/sys/contrib/openzfs/module/zfs/zfs_ioctl.c @@ -1081,7 +1081,7 @@ zfs_secpolicy_diff(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) (void) innvl; int error; - if ((error = secpolicy_sys_config(cr, B_FALSE)) == 0) + if (secpolicy_sys_config(cr, B_FALSE) == 0) return (0); error = zfs_secpolicy_write_perms(zc->zc_name, ZFS_DELEG_PERM_DIFF, cr); @@ -1230,8 +1230,8 @@ zfs_secpolicy_tmp_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) */ int error; - if ((error = zfs_secpolicy_write_perms(zc->zc_name, - ZFS_DELEG_PERM_DIFF, cr)) == 0) + if (zfs_secpolicy_write_perms(zc->zc_name, + ZFS_DELEG_PERM_DIFF, cr) == 0) return (0); error = zfs_secpolicy_snapshot_perms(zc->zc_name, cr); @@ -1279,8 +1279,7 @@ get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp) packed = vmem_alloc(size, KM_SLEEP); - if ((error = ddi_copyin((void *)(uintptr_t)nvl, packed, size, - iflag)) != 0) { + if (ddi_copyin((void *)(uintptr_t)nvl, packed, size, iflag) != 0) { vmem_free(packed, size); return (SET_ERROR(EFAULT)); } @@ -2039,7 +2038,7 @@ zfs_ioc_objset_stats_impl(zfs_cmd_t *zc, objset_t *os) dmu_objset_fast_stat(os, &zc->zc_objset_stats); - if (zc->zc_nvlist_dst != 0 && + if (!zc->zc_simple && zc->zc_nvlist_dst != 0 && (error = dsl_prop_get_all(os, &nv)) == 0) { dmu_objset_stats(os, nv); /* @@ -2326,8 +2325,7 @@ zfs_ioc_snapshot_list_next(zfs_cmd_t *zc) } if (zc->zc_simple) { - zc->zc_objset_stats.dds_creation_txg = - dsl_get_creationtxg(ds); + dsl_dataset_fast_stat(ds, &zc->zc_objset_stats); dsl_dataset_rele(ds, FTAG); break; } @@ -2683,7 +2681,6 @@ zfs_set_prop_nvlist(const char *dsname, zprop_source_t source, nvlist_t *nvl, pair = NULL; while ((pair = nvlist_next_nvpair(genericnvl, pair)) != NULL) { const char *propname = nvpair_name(pair); - err = 0; propval = pair; if (nvpair_type(pair) == DATA_TYPE_NVLIST) { @@ -3096,7 +3093,7 @@ zfs_ioc_set_fsacl(zfs_cmd_t *zc) /* * Verify nvlist is constructed correctly */ - if ((error = zfs_deleg_verify_nvlist(fsaclnv)) != 0) { + if (zfs_deleg_verify_nvlist(fsaclnv) != 0) { nvlist_free(fsaclnv); return (SET_ERROR(EINVAL)); } @@ -5696,17 +5693,12 @@ zfs_ioc_error_log(zfs_cmd_t *zc) { spa_t *spa; int error; - uint64_t count = zc->zc_nvlist_dst_size; if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) return (error); error = spa_get_errlog(spa, (void *)(uintptr_t)zc->zc_nvlist_dst, - &count); - if (error == 0) - zc->zc_nvlist_dst_size = count; - else - zc->zc_nvlist_dst_size = spa_get_errlog_size(spa); + &zc->zc_nvlist_dst_size); spa_close(spa, FTAG); diff --git a/sys/contrib/openzfs/module/zfs/zfs_vnops.c b/sys/contrib/openzfs/module/zfs/zfs_vnops.c index 45ecb0773260..0c392b9da0fb 100644 --- a/sys/contrib/openzfs/module/zfs/zfs_vnops.c +++ b/sys/contrib/openzfs/module/zfs/zfs_vnops.c @@ -876,7 +876,7 @@ zfs_get_data(void *arg, uint64_t gen, lr_write_t *lr, char *buf, return (SET_ERROR(ENOENT)); } - zgd = (zgd_t *)kmem_zalloc(sizeof (zgd_t), KM_SLEEP); + zgd = kmem_zalloc(sizeof (zgd_t), KM_SLEEP); zgd->zgd_lwb = lwb; zgd->zgd_private = zp; diff --git a/sys/contrib/openzfs/module/zfs/zil.c b/sys/contrib/openzfs/module/zfs/zil.c index 02e6f4b83b9c..2e017992fd9a 100644 --- a/sys/contrib/openzfs/module/zfs/zil.c +++ b/sys/contrib/openzfs/module/zfs/zil.c @@ -92,6 +92,14 @@ */ static uint_t zfs_commit_timeout_pct = 5; +/* + * Minimal time we care to delay commit waiting for more ZIL records. + * At least FreeBSD kernel can't sleep for less than 2us at its best. + * So requests to sleep for less then 5us is a waste of CPU time with + * a risk of significant log latency increase due to oversleep. + */ +static uint64_t zil_min_commit_timeout = 5000; + /* * See zil.h for more information about these fields. */ @@ -1295,7 +1303,8 @@ zil_lwb_flush_vdevs_done(zio_t *zio) lwb->lwb_buf = NULL; ASSERT3U(lwb->lwb_issued_timestamp, >, 0); - zilog->zl_last_lwb_latency = gethrtime() - lwb->lwb_issued_timestamp; + zilog->zl_last_lwb_latency = (zilog->zl_last_lwb_latency * 3 + + gethrtime() - lwb->lwb_issued_timestamp) / 4; lwb->lwb_root_zio = NULL; @@ -2463,8 +2472,9 @@ zil_process_commit_list(zilog_t *zilog) spa_t *spa = zilog->zl_spa; list_t nolwb_itxs; list_t nolwb_waiters; - lwb_t *lwb; + lwb_t *lwb, *plwb; itx_t *itx; + boolean_t first = B_TRUE; ASSERT(MUTEX_HELD(&zilog->zl_issuer_lock)); @@ -2491,6 +2501,9 @@ zil_process_commit_list(zilog_t *zilog) ASSERT3S(lwb->lwb_state, !=, LWB_STATE_ISSUED); ASSERT3S(lwb->lwb_state, !=, LWB_STATE_WRITE_DONE); ASSERT3S(lwb->lwb_state, !=, LWB_STATE_FLUSH_DONE); + first = (lwb->lwb_state != LWB_STATE_OPENED) && + ((plwb = list_prev(&zilog->zl_lwb_list, lwb)) == NULL || + plwb->lwb_state == LWB_STATE_FLUSH_DONE); } while ((itx = list_head(&zilog->zl_itx_commit_list)) != NULL) { @@ -2661,7 +2674,23 @@ zil_process_commit_list(zilog_t *zilog) * try and pack as many itxs into as few lwbs as * possible, without significantly impacting the latency * of each individual itx. + * + * If we had no already running or open LWBs, it can be + * the workload is single-threaded. And if the ZIL write + * latency is very small or if the LWB is almost full, it + * may be cheaper to bypass the delay. */ + if (lwb->lwb_state == LWB_STATE_OPENED && first) { + hrtime_t sleep = zilog->zl_last_lwb_latency * + zfs_commit_timeout_pct / 100; + if (sleep < zil_min_commit_timeout || + lwb->lwb_sz - lwb->lwb_nused < lwb->lwb_sz / 8) { + lwb = zil_lwb_write_issue(zilog, lwb); + zilog->zl_cur_used = 0; + if (lwb == NULL) + zil_commit_writer_stall(zilog); + } + } } } @@ -3949,6 +3978,9 @@ EXPORT_SYMBOL(zil_kstat_values_update); ZFS_MODULE_PARAM(zfs, zfs_, commit_timeout_pct, UINT, ZMOD_RW, "ZIL block open timeout percentage"); +ZFS_MODULE_PARAM(zfs_zil, zil_, min_commit_timeout, U64, ZMOD_RW, + "Minimum delay we care for ZIL block commit"); + ZFS_MODULE_PARAM(zfs_zil, zil_, replay_disable, INT, ZMOD_RW, "Disable intent logging replay"); diff --git a/sys/contrib/openzfs/module/zfs/zio.c b/sys/contrib/openzfs/module/zfs/zio.c index 9ae2458669f7..5d7ed6d582a2 100644 --- a/sys/contrib/openzfs/module/zfs/zio.c +++ b/sys/contrib/openzfs/module/zfs/zio.c @@ -2826,7 +2826,7 @@ zio_write_gang_block(zio_t *pio, metaslab_class_t *mc) * have a third copy. */ gbh_copies = MIN(copies + 1, spa_max_replication(spa)); - if (gio->io_prop.zp_encrypt && gbh_copies >= SPA_DVAS_PER_BP) + if (BP_IS_ENCRYPTED(bp) && gbh_copies >= SPA_DVAS_PER_BP) gbh_copies = SPA_DVAS_PER_BP - 1; int flags = METASLAB_HINTBP_FAVOR | METASLAB_GANG_HEADER; diff --git a/sys/contrib/openzfs/module/zfs/zvol.c b/sys/contrib/openzfs/module/zfs/zvol.c index 20578a8223b2..1511f763fd77 100644 --- a/sys/contrib/openzfs/module/zfs/zvol.c +++ b/sys/contrib/openzfs/module/zfs/zvol.c @@ -646,7 +646,7 @@ zvol_get_data(void *arg, uint64_t arg2, lr_write_t *lr, char *buf, ASSERT3P(zio, !=, NULL); ASSERT3U(size, !=, 0); - zgd = (zgd_t *)kmem_zalloc(sizeof (zgd_t), KM_SLEEP); + zgd = kmem_zalloc(sizeof (zgd_t), KM_SLEEP); zgd->zgd_lwb = lwb; /* @@ -1076,7 +1076,7 @@ zvol_create_minors_cb(const char *dsname, void *arg) * traverse snapshots only, do not traverse children, * and skip the 'dsname' */ - error = dmu_objset_find(dsname, + (void) dmu_objset_find(dsname, zvol_create_snap_minor_cb, (void *)job, DS_FIND_SNAPSHOTS); } diff --git a/sys/contrib/openzfs/module/zstd/include/zstd_compat_wrapper.h b/sys/contrib/openzfs/module/zstd/include/zstd_compat_wrapper.h index 2c4baad27d4e..4e6561f31a68 100644 --- a/sys/contrib/openzfs/module/zstd/include/zstd_compat_wrapper.h +++ b/sys/contrib/openzfs/module/zstd/include/zstd_compat_wrapper.h @@ -73,11 +73,9 @@ #define FSE_buildDTable_raw zfs_FSE_buildDTable_raw #define FSE_buildDTable_rle zfs_FSE_buildDTable_rle #define FSE_buildDTable zfs_FSE_buildDTable -#define FSE_createDTable zfs_FSE_createDTable #define FSE_decompress_usingDTable zfs_FSE_decompress_usingDTable #define FSE_decompress_wksp zfs_FSE_decompress_wksp #define FSE_decompress zfs_FSE_decompress -#define FSE_freeDTable zfs_FSE_freeDTable /* lib/common/pool.o: */ #define POOL_add zfs_POOL_add diff --git a/sys/contrib/openzfs/module/zstd/lib/common/fse_decompress.c b/sys/contrib/openzfs/module/zstd/lib/common/fse_decompress.c index bcc2223ccc65..6b3205c63cc8 100644 --- a/sys/contrib/openzfs/module/zstd/lib/common/fse_decompress.c +++ b/sys/contrib/openzfs/module/zstd/lib/common/fse_decompress.c @@ -56,17 +56,6 @@ /* Function templates */ -FSE_DTable* FSE_createDTable (unsigned tableLog) -{ - if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX; - return (FSE_DTable*)malloc( FSE_DTABLE_SIZE_U32(tableLog) * sizeof (U32) ); -} - -void FSE_freeDTable (FSE_DTable* dt) -{ - free(dt); -} - size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) { void* const tdPtr = dt+1; /* because *dt is unsigned, 32-bits aligned on 32-bits */ diff --git a/sys/contrib/openzfs/module/zstd/zfs_zstd.c b/sys/contrib/openzfs/module/zstd/zfs_zstd.c index 7d031bbdbe31..76b5e2759f4f 100644 --- a/sys/contrib/openzfs/module/zstd/zfs_zstd.c +++ b/sys/contrib/openzfs/module/zstd/zfs_zstd.c @@ -793,9 +793,9 @@ create_fallback_mem(struct zstd_fallback_mem *mem, size_t size) static void __init zstd_mempool_init(void) { - zstd_mempool_cctx = (struct zstd_pool *) + zstd_mempool_cctx = kmem_zalloc(ZSTD_POOL_MAX * sizeof (struct zstd_pool), KM_SLEEP); - zstd_mempool_dctx = (struct zstd_pool *) + zstd_mempool_dctx = kmem_zalloc(ZSTD_POOL_MAX * sizeof (struct zstd_pool), KM_SLEEP); for (int i = 0; i < ZSTD_POOL_MAX; i++) { diff --git a/sys/contrib/openzfs/scripts/debian-packaging.sh b/sys/contrib/openzfs/scripts/debian-packaging.sh deleted file mode 100755 index 9cd042fa44da..000000000000 --- a/sys/contrib/openzfs/scripts/debian-packaging.sh +++ /dev/null @@ -1,36 +0,0 @@ -#!/bin/bash - -# -# This script can be used to invoke OpenZFS build from native Debian -# packaging. -# - -print_help () -{ - echo "Usage: $(basename $0) [OPTIONS]" - echo - echo "Options:" - echo " -b, --build Build OpenZFS from Debian Packaging" - echo " -c, --clean Clean the workspace" -} - -if [ "$#" -ne 1 ]; then - print_help - exit 1 -fi - -case $1 in - -b|--build) - cp -r contrib/debian debian - debuild -i -us -uc -b && fakeroot debian/rules override_dh_binary-modules - ;; - -c|--clean) - fakeroot debian/rules override_dh_auto_clean - rm -rf debian - ;; - *) - print_help - ;; -esac - -exit 0 diff --git a/sys/contrib/openzfs/tests/runfiles/common.run b/sys/contrib/openzfs/tests/runfiles/common.run index 1b42786e9d58..005c539fc89d 100644 --- a/sys/contrib/openzfs/tests/runfiles/common.run +++ b/sys/contrib/openzfs/tests/runfiles/common.run @@ -842,6 +842,7 @@ tests = ['recv_dedup', 'recv_dedup_encrypted_zvol', 'rsend_001_pos', 'send-c_recv_lz4_disabled', 'send-c_mixed_compression', 'send-c_stream_size_estimate', 'send-c_embedded_blocks', 'send-c_resume', 'send-cpL_varied_recsize', 'send-c_recv_dedup', 'send-L_toggle', + 'send_encrypted_incremental.ksh', 'send_encrypted_freeobjects', 'send_encrypted_hierarchy', 'send_encrypted_props', 'send_encrypted_truncated_files', 'send_freeobjects', 'send_realloc_files', 'send_realloc_encrypted_files', 'send_spill_block', 'send_holds', diff --git a/sys/contrib/openzfs/tests/runfiles/linux.run b/sys/contrib/openzfs/tests/runfiles/linux.run index 23292a4889a8..15755408b5ad 100644 --- a/sys/contrib/openzfs/tests/runfiles/linux.run +++ b/sys/contrib/openzfs/tests/runfiles/linux.run @@ -87,7 +87,7 @@ tags = ['functional', 'devices'] [tests/functional/events:Linux] tests = ['events_001_pos', 'events_002_pos', 'zed_rc_filter', 'zed_fd_spill', - 'zed_cksum_reported'] + 'zed_cksum_reported', 'zed_cksum_config', 'zed_io_config'] tags = ['functional', 'events'] [tests/functional/fadvise:Linux] diff --git a/sys/contrib/openzfs/tests/runfiles/sanity.run b/sys/contrib/openzfs/tests/runfiles/sanity.run index 51423966206f..449bf1c0f56a 100644 --- a/sys/contrib/openzfs/tests/runfiles/sanity.run +++ b/sys/contrib/openzfs/tests/runfiles/sanity.run @@ -550,6 +550,7 @@ tests = ['recv_dedup', 'recv_dedup_encrypted_zvol', 'rsend_001_pos', 'rsend_014_pos', 'rsend_016_neg', 'send-c_verify_contents', 'send-c_volume', 'send-c_zstreamdump', 'send-c_recv_dedup', 'send-L_toggle', 'send_encrypted_hierarchy', 'send_encrypted_props', + 'send_encrypted_freeobjects', 'send_encrypted_truncated_files', 'send_freeobjects', 'send_holds', 'send_mixed_raw', 'send-wR_encrypted_zvol', 'send_partial_dataset', 'send_invalid'] @@ -627,4 +628,6 @@ tags = ['functional', 'zpool_influxdb'] [tests/functional/pyzfs] tests = ['pyzfs_unittest'] +pre = +post = tags = ['functional', 'pyzfs'] diff --git a/sys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in b/sys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in index a2324d4b52ea..f3cfca912a57 100755 --- a/sys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in +++ b/sys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in @@ -194,6 +194,7 @@ elif sys.platform.startswith('linux'): # reasons listed above can be used. # maybe = { + 'append/threadsappend_001_pos': ['FAIL', 6136], 'chattr/setup': ['SKIP', exec_reason], 'crtime/crtime_001_pos': ['SKIP', statx_reason], 'cli_root/zdb/zdb_006_pos': ['FAIL', known_reason], @@ -224,6 +225,7 @@ maybe = { 'io/mmap': ['SKIP', fio_reason], 'largest_pool/largest_pool_001_pos': ['FAIL', known_reason], 'mmp/mmp_on_uberblocks': ['FAIL', known_reason], + 'pam/setup': ['SKIP', "pamtester might be not available"], 'pool_checkpoint/checkpoint_discard_busy': ['FAIL', 11946], 'projectquota/setup': ['SKIP', exec_reason], 'removal/removal_condense_export': ['FAIL', known_reason], @@ -235,13 +237,12 @@ maybe = { 'snapshot/snapshot_010_pos': ['FAIL', 7961], 'snapused/snapused_004_pos': ['FAIL', 5513], 'tmpfile/setup': ['SKIP', tmpfile_reason], - 'append/threadsappend_001_pos': ['FAIL', 6136], 'trim/setup': ['SKIP', trim_reason], 'upgrade/upgrade_projectquota_001_pos': ['SKIP', project_id_reason], 'user_namespace/setup': ['SKIP', user_ns_reason], 'userquota/setup': ['SKIP', exec_reason], + 'vdev_zaps/vdev_zaps_004_pos': ['FAIL', known_reason], 'zvol/zvol_ENOSPC/zvol_ENOSPC_001_pos': ['FAIL', 5848], - 'pam/setup': ['SKIP', "pamtester might be not available"], } if sys.platform.startswith('freebsd'): @@ -261,8 +262,11 @@ elif sys.platform.startswith('linux'): maybe.update({ 'cli_root/zfs_rename/zfs_rename_002_pos': ['FAIL', known_reason], 'cli_root/zpool_reopen/zpool_reopen_003_pos': ['FAIL', known_reason], - 'fault/auto_spare_shared': ['FAIL', 11889], + 'fault/auto_online_002_pos': ['FAIL', 11889], + 'fault/auto_spare_002_pos': ['FAIL', 11889], 'fault/auto_spare_multiple': ['FAIL', 11889], + 'fault/auto_spare_shared': ['FAIL', 11889], + 'fault/decompress_fault': ['FAIL', 11889], 'io/io_uring': ['SKIP', 'io_uring support required'], 'limits/filesystem_limit': ['SKIP', known_reason], 'limits/snapshot_limit': ['SKIP', known_reason], diff --git a/sys/contrib/openzfs/tests/zfs-tests/cmd/mmapwrite.c b/sys/contrib/openzfs/tests/zfs-tests/cmd/mmapwrite.c index a18609898485..20a50085a227 100644 --- a/sys/contrib/openzfs/tests/zfs-tests/cmd/mmapwrite.c +++ b/sys/contrib/openzfs/tests/zfs-tests/cmd/mmapwrite.c @@ -52,6 +52,7 @@ */ #define NORMAL_WRITE_TH_NUM 2 +#define MAX_WRITE_BYTES 262144000 static void * normal_writer(void *filename) @@ -67,18 +68,25 @@ normal_writer(void *filename) } char buf = 'z'; - while (1) { + off_t bytes_written = 0; + + while (bytes_written < MAX_WRITE_BYTES) { write_num = write(fd, &buf, 1); if (write_num == 0) { err(1, "write failed!"); break; } - if (lseek(fd, page_size, SEEK_CUR) == -1) { + if ((bytes_written = lseek(fd, page_size, SEEK_CUR)) == -1) { err(1, "lseek failed on %s: %s", file_path, strerror(errno)); break; } } + + if (close(fd) != 0) + err(1, "failed to close file"); + + return (NULL); } static void * diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am b/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am index 144a848cf3fa..bbe94f9177ae 100644 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am @@ -1367,8 +1367,10 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/events/events_001_pos.ksh \ functional/events/events_002_pos.ksh \ functional/events/setup.ksh \ + functional/events/zed_cksum_config.ksh \ functional/events/zed_cksum_reported.ksh \ functional/events/zed_fd_spill.ksh \ + functional/events/zed_io_config.ksh \ functional/events/zed_rc_filter.ksh \ functional/exec/cleanup.ksh \ functional/exec/exec_001_pos.ksh \ @@ -1711,6 +1713,11 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/rename_dirs/cleanup.ksh \ functional/rename_dirs/rename_dirs_001_pos.ksh \ functional/rename_dirs/setup.ksh \ + functional/renameat2/cleanup.ksh \ + functional/renameat2/setup.ksh \ + functional/renameat2/renameat2_exchange.ksh \ + functional/renameat2/renameat2_noreplace.ksh \ + functional/renameat2/renameat2_whiteout.ksh \ functional/replacement/attach_import.ksh \ functional/replacement/attach_multiple.ksh \ functional/replacement/attach_rebuild.ksh \ @@ -1789,7 +1796,6 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/rsend/send-c_incremental.ksh \ functional/rsend/send-c_lz4_disabled.ksh \ functional/rsend/send-c_mixed_compression.ksh \ - functional/rsend/send-cpL_varied_recsize.ksh \ functional/rsend/send-c_props.ksh \ functional/rsend/send-c_recv_dedup.ksh \ functional/rsend/send-c_recv_lz4_disabled.ksh \ @@ -1798,9 +1804,13 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/rsend/send-c_verify_contents.ksh \ functional/rsend/send-c_verify_ratio.ksh \ functional/rsend/send-c_volume.ksh \ + functional/rsend/send-c_zstream_recompress.ksh \ functional/rsend/send-c_zstreamdump.ksh \ + functional/rsend/send-cpL_varied_recsize.ksh \ functional/rsend/send_doall.ksh \ + functional/rsend/send_encrypted_incremental.ksh \ functional/rsend/send_encrypted_files.ksh \ + functional/rsend/send_encrypted_freeobjects.ksh \ functional/rsend/send_encrypted_hierarchy.ksh \ functional/rsend/send_encrypted_props.ksh \ functional/rsend/send_encrypted_truncated_files.ksh \ diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/acl/posix/posix_004_pos.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/acl/posix/posix_004_pos.ksh index 7906f5063c81..ffb5b4db71b4 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/acl/posix/posix_004_pos.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/acl/posix/posix_004_pos.ksh @@ -35,6 +35,7 @@ # STRATEGY: # 1. Prepare an appropriate ACL on the test directory # 2. Change the owner of the directory +# 3. Reset and set the ACLs for test directory owned by the user # verify_runnable "both" @@ -44,6 +45,8 @@ log_must setfacl -d -m u:$ZFS_ACL_STAFF1:rwx $TESTDIR log_must setfacl -b $TESTDIR log_must chown $ZFS_ACL_STAFF1 $TESTDIR +log_must setfacl -b $TESTDIR +log_must setfacl -d -m u:$ZFS_ACL_STAFF1:rwx $TESTDIR log_must chown 0 $TESTDIR log_pass "chown works with POSIX ACLs" diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_property/zfs_written_property_001_pos.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_property/zfs_written_property_001_pos.ksh index f53a4ac71b68..fd3194fe9895 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_property/zfs_written_property_001_pos.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_property/zfs_written_property_001_pos.ksh @@ -163,6 +163,7 @@ before_clone=$(get_prop written $TESTPOOL/$TESTFS1) log_must zfs clone $TESTPOOL/$TESTFS1@snap1 $TESTPOOL/$TESTFS1/snap1.clone log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS1/snap1.clone/testfile bs=1M \ count=40 +sync_pool after_clone=$(get_prop written $TESTPOOL/$TESTFS1) within_percent $before_clone $after_clone 99.5 || \ log_fail "unexpected written for clone $before_clone $after_clone" diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_to_encrypted.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_to_encrypted.ksh index 8bd9a6854950..7e12d30d0e7e 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_to_encrypted.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_to_encrypted.ksh @@ -41,6 +41,9 @@ verify_runnable "both" function cleanup { + datasetexists $TESTPOOL/encrypted && \ + destroy_dataset $TESTPOOL/encrypted -r + snapexists $snap && destroy_dataset $snap -f snapexists $snap2 && destroy_dataset $snap2 -f @@ -97,4 +100,15 @@ log_note "Verifying ZFS will not receive to an encrypted child when the" \ "parent key is unloaded" log_mustnot eval "zfs send $snap | zfs receive $TESTPOOL/$TESTFS1/c4" +# Verify that replication can override encryption properties +log_note "Verifying replication can override encryption properties for plain dataset" +typeset key_location="/$TESTPOOL/pkey1" +log_must eval "echo $passphrase > $key_location" +log_must eval "zfs send -R $snap2 | zfs recv -s -F -o encryption=on" \ + "-o keyformat=passphrase -o keylocation=file://$key_location" \ + "-o mountpoint=none $TESTPOOL/encrypted" +log_must test "$(get_prop 'encryption' $TESTPOOL/encrypted)" != "off" +log_must test "$(get_prop 'keyformat' $TESTPOOL/encrypted)" == "passphrase" +log_must test "$(get_prop 'keylocation' $TESTPOOL/encrypted)" == "file://$key_location" + log_pass "ZFS can receive encrypted filesystems into child dataset" diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_006_pos.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_006_pos.ksh index c5dfb89394f4..d7ee161eb3be 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_006_pos.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_006_pos.ksh @@ -119,33 +119,33 @@ full_size=$(zfs send $full_snapshot 2>&1 | wc -c) incremental_size=$(zfs send $incremental_snapshot 2>&1 | wc -c) incremental_send=$(zfs send -i $full_snapshot $incremental_snapshot 2>&1 | wc -c) -log_note "verify zfs send -nv" -options="-nv" +log_note "verify zfs send -nvV" +options="-nvV" refer_size=$(get_prop refer $full_snapshot) estimate_size=$(get_estimate_size $full_snapshot $options) log_must verify_size_estimates $options $full_size -log_note "verify zfs send -Pnv" -options="-Pnv" +log_note "verify zfs send -PnvV" +options="-PnvV" estimate_size=$(get_estimate_size $full_snapshot $options) log_must verify_size_estimates $options $full_size -log_note "verify zfs send -nv for multiple snapshot send" -options="-nv" +log_note "verify zfs send -nvV for multiple snapshot send" +options="-nvV" refer_size=$(get_prop refer $incremental_snapshot) estimate_size=$(get_estimate_size $incremental_snapshot $options) log_must verify_size_estimates $options $incremental_size -log_note "verify zfs send -vPn for multiple snapshot send" -options="-vPn" +log_note "verify zfs send -vVPn for multiple snapshot send" +options="-vVPn" estimate_size=$(get_estimate_size $incremental_snapshot $options) log_must verify_size_estimates $options $incremental_size -log_note "verify zfs send -inv for incremental send" -options="-nvi" +log_note "verify zfs send -invV for incremental send" +options="-nvVi" refer_size=$(get_prop refer $incremental_snapshot) deduct_size=$(get_prop refer $full_snapshot) refer_size=$(echo "$refer_size - $deduct_size" | bc) @@ -155,8 +155,8 @@ log_must verify_size_estimates $options $incremental_send estimate_size=$(get_estimate_size $incremental_snapshot $options $full_bookmark) log_must verify_size_estimates $options $incremental_send -log_note "verify zfs send -ivPn for incremental send" -options="-vPni" +log_note "verify zfs send -ivVPn for incremental send" +options="-vVPni" estimate_size=$(get_estimate_size $incremental_snapshot $options $full_snapshot) log_must verify_size_estimates $options $incremental_send @@ -186,16 +186,16 @@ for ds in $datasets; do datasetexists $ds@snap64 || log_fail "Create $ds@snap64 snapshot fail." done recursive_size=$(zfs send -R $full_snapshot 2>&1 | wc -c) -log_note "verify zfs send -Rnv for recursive send" -options="-Rnv" +log_note "verify zfs send -RnvV for recursive send" +options="-RnvV" refer_size=$(get_prop refer $full_snapshot) refer_size=$(echo "$refer_size * 3" | bc) estimate_size=$(get_estimate_size $full_snapshot $options) log_must verify_size_estimates $options $recursive_size -log_note "verify zfs send -RvPn for recursive send" -options="-RvPn" +log_note "verify zfs send -RvVPn for recursive send" +options="-RvVPn" estimate_size=$(get_estimate_size $full_snapshot $options) log_must verify_size_estimates $options $recursive_size diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/events/zed_cksum_config.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/events/zed_cksum_config.ksh new file mode 100755 index 000000000000..5aae3d034660 --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/events/zed_cksum_config.ksh @@ -0,0 +1,158 @@ +#!/bin/ksh -p +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2022, Klara Inc. +# + +# DESCRIPTION: +# Verify that vdev properties, checksum_n and checksum_t, work with ZED. +# +# STRATEGY: +# 1. Create a pool with single vdev +# 2. Set checksum_n/checksum_t to non-default values +# 3. Inject checksum errors +# 4. Verify that ZED degrades vdev +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/events/events_common.kshlib + +verify_runnable "both" + +MOUNTDIR="$TEST_BASE_DIR/checksum_mount" +FILEPATH="$MOUNTDIR/checksum_file" +VDEV="$TEST_BASE_DIR/vdevfile.$$" +POOL="checksum_pool" +FILESIZE="10M" + +function cleanup +{ + log_must zed_stop + + log_must zinject -c all + if poolexists $POOL ; then + destroy_pool $POOL + fi + log_must rm -fd $VDEV $MOUNTDIR +} + +log_onexit cleanup + +log_assert "Test ZED checksum_N and checksum_T configurability" + +function do_setup +{ + log_must zpool create -f -m $MOUNTDIR $POOL $VDEV + log_must zpool events -c + log_must truncate -s 0 $ZED_DEBUG_LOG + log_must zfs set compression=off $POOL + log_must zfs set primarycache=none $POOL + log_must zfs set recordsize=512 $POOL +} + +function do_clean +{ + log_must zinject -c all + log_must zpool destroy $POOL +} + +function must_degrade +{ + log_must wait_vdev_state $POOL $VDEV "DEGRADED" 60 +} + +function mustnot_degrade +{ + log_must file_wait $ZED_DEBUG_LOG 5 + log_must wait_vdev_state $POOL $VDEV "ONLINE" 60 +} + +# Test default settings of ZED: +# checksum_n=10 +# checksum_t=600 +# fire 10 events, should degrade. +function default_degrade +{ + do_setup + + log_must mkfile $FILESIZE $FILEPATH + log_must zinject -a -t data -e checksum -T read -f 100 $FILEPATH + + blk=0 + for _ in {1..10}; do + dd if=$FILEPATH of=/dev/null bs=1 count=1 skip=$blk 2>/dev/null + blk=$((blk+512)) + done + + must_degrade + + do_clean +} + +# Set checksum_t=1 +# fire 10 events over 2.5 seconds, should not degrade. +function checksum_t_no_degrade +{ + do_setup + + log_must zpool set checksum_t=1 $POOL $VDEV + log_must mkfile $FILESIZE $FILEPATH + log_must zinject -a -t data -e checksum -T read -f 100 $FILEPATH + + blk=0 + for _ in {1..10}; do + dd if=$FILEPATH of=/dev/null bs=1 count=1 skip=$blk 2>/dev/null + blk=$((blk+512)) + sleep 0.25 + done + + mustnot_degrade + + do_clean +} + +# Set checksum_n=1 +# fire 1 event, should degrade. +function checksum_n_degrade +{ + do_setup + + log_must zpool set checksum_n=1 $POOL $VDEV + log_must mkfile $FILESIZE $FILEPATH + log_must zinject -a -t data -e checksum -T read -f 100 $FILEPATH + + dd if=$FILEPATH of=/dev/null bs=1 count=1 2>/dev/null + + must_degrade + + do_clean +} + +log_must truncate -s $MINVDEVSIZE $VDEV +log_must mkdir -p $MOUNTDIR + +log_must zed_start +default_degrade +checksum_n_degrade +checksum_t_no_degrade + +log_pass "Test ZED checksum_N and checksum_T configurability" diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/events/zed_io_config.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/events/zed_io_config.ksh new file mode 100755 index 000000000000..637f979fe8d1 --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/events/zed_io_config.ksh @@ -0,0 +1,150 @@ +#!/bin/ksh -p +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2022, Klara Inc. +# + +# DESCRIPTION: +# Verify that vdev properties, io_n and io_t, work with ZED. +# +# STRATEGY: +# 1. Create a mirrored pool. +# 3. Set io_n/io_t to non-default values +# 3. Inject io errors +# 4. Verify that ZED degrades vdev + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/events/events_common.kshlib + +verify_runnable "both" + +MOUNTDIR="$TEST_BASE_DIR/io_mount" +FILEPATH="$MOUNTDIR/io_file" +VDEV="$TEST_BASE_DIR/vdevfile.$$" +VDEV1="$TEST_BASE_DIR/vdevfile1.$$" +POOL="io_pool" + +function cleanup +{ + log_must zed_stop + + log_must zinject -c all + if poolexists $POOL ; then + destroy_pool $POOL + fi + log_must rm -fd $VDEV $VDEV1 $MOUNTDIR + log_must set_tunable32 PREFETCH_DISABLE $zfsprefetch +} +log_onexit cleanup + +log_assert "Test ZED io_n and io_t configurability" + +zfsprefetch=$(get_tunable PREFETCH_DISABLE) +log_must set_tunable32 PREFETCH_DISABLE 1 + +function setup_pool +{ + log_must zpool create -f -m $MOUNTDIR $POOL mirror $VDEV $VDEV1 + log_must zpool events -c + log_must truncate -s 0 $ZED_DEBUG_LOG + log_must zfs set compression=off $POOL + log_must zfs set primarycache=none $POOL + log_must zfs set recordsize=512 $POOL +} + +function do_clean +{ + log_must zinject -c all + log_must zpool destroy $POOL +} + +# Test default ZED settings: +# io_n=10 (events) +# io_t=600 (seconds) +# fire 10 events over 2.5 seconds, should degrade. +function default_degrade +{ + setup_pool + + log_must dd if=/dev/urandom of=$FILEPATH bs=1M count=64 + log_must zinject -a -d $VDEV -e io -T read -f 100 $POOL + + blk=0 + for _ in {1..10}; do + dd if=$FILEPATH of=/dev/null bs=1 count=1 skip=$blk 2>/dev/null + blk=$((blk+512)) + sleep 0.25 + done + + log_must wait_vdev_state $POOL $VDEV "FAULTED" 60 + do_clean +} + +# set io_n=1 +# fire 1 event, should degrade +function io_n_degrade +{ + setup_pool + + log_must zpool set io_n=1 $POOL $VDEV + log_must dd if=/dev/urandom of=$FILEPATH bs=1M count=64 + log_must zinject -a -d $VDEV -e io -T read -f 100 $POOL + + dd if=$FILEPATH of=/dev/null bs=1 count=1 2>/dev/null + + log_must wait_vdev_state $POOL $VDEV "FAULTED" 60 + do_clean +} + +# set io_t=1 +# fire 10 events over 2.5 seconds, should not degrade +function io_t_nodegrade +{ + setup_pool + + log_must zpool set io_t=1 $POOL $VDEV + log_must dd if=/dev/urandom of=$FILEPATH bs=1M count=64 + log_must zinject -a -d $VDEV -e io -T read -f 100 $POOL + + blk=0 + for _ in {1..10}; do + dd if=$FILEPATH of=/dev/null bs=1 count=1 skip=$blk 2>/dev/null + blk=$((blk+512)) + sleep 0.25 + done + + log_must file_wait $ZED_DEBUG_LOG 30 + log_must wait_vdev_state $POOL $VDEV "ONLINE" 1 + + do_clean +} + +log_must truncate -s $MINVDEVSIZE $VDEV +log_must truncate -s $MINVDEVSIZE $VDEV1 +log_must mkdir -p $MOUNTDIR + +log_must zed_start +default_degrade +io_n_degrade +io_t_nodegrade + +log_pass "Test ZED io_n and io_t configurability" diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/l2arc/l2arc_mfuonly_pos.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/l2arc/l2arc_mfuonly_pos.ksh index f2bada0ebbec..89ab940334ee 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/l2arc/l2arc_mfuonly_pos.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/l2arc/l2arc_mfuonly_pos.ksh @@ -80,7 +80,7 @@ log_must fio $FIO_SCRIPTS/mkfiles.fio log_must fio $FIO_SCRIPTS/random_reads.fio log_must zpool export $TESTPOOL -log_must zpool import -d $VDIR $TESTPOOL +log_must zpool import -N -d $VDIR $TESTPOOL # Regardless of l2arc_noprefetch, some MFU buffers might be evicted # from ARC, accessed later on as prefetches and transition to MRU as diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/renameat2/Makefile.am b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/renameat2/Makefile.am deleted file mode 100644 index bd8d6c9d68bf..000000000000 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/renameat2/Makefile.am +++ /dev/null @@ -1,7 +0,0 @@ -pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/renameat2 -dist_pkgdata_SCRIPTS = \ - setup.ksh \ - cleanup.ksh \ - renameat2_noreplace.ksh \ - renameat2_exchange.ksh \ - renameat2_whiteout.ksh diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/rsend/send_encrypted_freeobjects.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/rsend/send_encrypted_freeobjects.ksh new file mode 100755 index 000000000000..92451bd1ab6f --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/rsend/send_encrypted_freeobjects.ksh @@ -0,0 +1,87 @@ +#!/bin/ksh + +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2017 by Lawrence Livermore National Security, LLC. +# Copyright (c) 2023 by Findity AB +# + +. $STF_SUITE/tests/functional/rsend/rsend.kshlib + +# +# Description: +# Verify that receiving a raw encrypted stream, with a FREEOBJECTS +# removing all existing objects in a block followed by an OBJECT write +# to the same block, does not result in a panic. +# +# Strategy: +# 1. Create a new encrypted filesystem +# 2. Create file f1 as the first object in some block (here object 128) +# 3. Take snapshot A +# 4. Create file f2 as the second object in the same block (here object 129) +# 5. Delete f1 +# 6. Take snapshot B +# 7. Receive a full raw encrypted send of A +# 8. Receive an incremental raw send of B +# +verify_runnable "both" + +function create_object_with_num +{ + file=$1 + num=$2 + + tries=100 + for ((i=0; i<$tries; i++)); do + touch $file + onum=$(ls -li $file | awk '{print $1}') + + if [[ $onum -ne $num ]] ; then + rm -f $file + else + break + fi + done + if [[ $i -eq $tries ]]; then + log_fail "Failed to create object with number $num" + fi +} + +log_assert "FREEOBJECTS followed by OBJECT in encrypted stream does not crash" + +sendds=sendencfods +recvds=recvencfods +keyfile=/$POOL/keyencfods +f1=/$POOL/$sendds/f1 +f2=/$POOL/$sendds/f2 + +log_must eval "echo 'password' > $keyfile" + +# +# xattr=sa and dnodesize=legacy for sequential object numbers, see +# note in send_freeobjects.ksh. +# +log_must zfs create -o xattr=sa -o dnodesize=legacy -o encryption=on \ + -o keyformat=passphrase -o keylocation=file://$keyfile $POOL/$sendds + +create_object_with_num $f1 128 +log_must zfs snap $POOL/$sendds@A +create_object_with_num $f2 129 +log_must rm $f1 +log_must zfs snap $POOL/$sendds@B + +log_must eval "zfs send -w $POOL/$sendds@A | zfs recv $POOL/$recvds" +log_must eval "zfs send -w -i $POOL/$sendds@A $POOL/$sendds@B |" \ + "zfs recv $POOL/$recvds" + +log_pass "FREEOBJECTS followed by OBJECT in encrypted stream did not crash" diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/rsend/send_encrypted_incremental.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/rsend/send_encrypted_incremental.ksh new file mode 100755 index 000000000000..491ad55db411 --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/rsend/send_encrypted_incremental.ksh @@ -0,0 +1,93 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/rsend/rsend.kshlib + +# +# Description: +# Incrementally receiving a snapshot to an encrypted filesystem succeeds. +# +# Strategy: +# 1) Create a pool and an encrypted fs +# 2) Create some files and snapshots +# 3) Send the first snapshot to a second encrypted as well as an +# unencrypted fs. +# 4) Incrementally send the second snapshot to the unencrypted fs. +# 5) Rollback the second encrypted fs to the first snapshot. +# 6) Incrementally send the second snapshot from the unencrypted to the +# second encrypted fs. +# 7) Incrementally send the third snapshot from the first encrypted to the +# unencrypted fs. +# 8) Incrementally send the third snapshot from the unencrypted to the second +# encrypted fs. +# + +verify_runnable "both" + +log_assert "Incrementally receiving a snapshot to an encrypted filesystem succeeds" + +function cleanup +{ + destroy_pool pool_lb + log_must rm -f $TESTDIR/vdev_a +} + +log_onexit cleanup + +typeset passphrase="password" +typeset passphrase2="password2" + +typeset file="/pool_lb/encryptme/$TESTFILE0" +typeset file1="/pool_lb/encryptme/$TESTFILE1" +typeset file2="/pool_lb/encryptme/$TESTFILE2" + +# Create pool +truncate -s $MINVDEVSIZE $TESTDIR/vdev_a +log_must zpool create -f pool_lb $TESTDIR/vdev_a +log_must eval "echo $passphrase > /pool_lb/pwd" +log_must eval "echo $passphrase2 > /pool_lb/pwd2" + +log_must zfs create -o encryption=on -o keyformat=passphrase \ + -o keylocation=file:///pool_lb/pwd pool_lb/encryptme +log_must dd if=/dev/urandom of=$file bs=1024 count=1024 +log_must zfs snapshot pool_lb/encryptme@snap1 +log_must dd if=/dev/urandom of=$file1 bs=1024 count=1024 +log_must zfs snapshot pool_lb/encryptme@snap2 +log_must dd if=/dev/urandom of=$file2 bs=1024 count=1024 +log_must zfs snapshot pool_lb/encryptme@snap3 +log_must eval "zfs send -Lc pool_lb/encryptme@snap1 | zfs recv \ + -o encryption=on -o keyformat=passphrase -o keylocation=file:///pool_lb/pwd2 \ + pool_lb/encrypttwo" +log_must eval "zfs send -Lc pool_lb/encryptme@snap1 | zfs recv \ + pool_lb/unencryptme" +log_must eval "zfs send -Lc -i pool_lb/encryptme@{snap1,snap2} | zfs recv \ + pool_lb/unencryptme" +log_must zfs rollback pool_lb/encrypttwo@snap1 +log_must eval "zfs send -Lc -i pool_lb/unencryptme@{snap1,snap2} | zfs recv \ + pool_lb/encrypttwo" +log_must eval "zfs send -Lc -i pool_lb/encryptme@{snap2,snap3} | zfs recv \ + pool_lb/unencryptme" +log_must eval "zfs send -Lc -i pool_lb/unencryptme@{snap2,snap3} | zfs recv \ + -F pool_lb/encrypttwo" + +log_pass "Incrementally receiving a snapshot to an encrypted filesystem succeeds" diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/rsend/send_raw_large_blocks.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/rsend/send_raw_large_blocks.ksh index 39e93a7df3c2..48a76fca63e1 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/rsend/send_raw_large_blocks.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/rsend/send_raw_large_blocks.ksh @@ -49,7 +49,7 @@ raw_backup=$TEST_BASE_DIR/raw_backup function cleanup { log_must rm -f $backup $raw_backup $ibackup $unc_backup - destroy_pool pool_lb/fs + destroy_pool pool_lb log_must rm -f $TESTDIR/vdev_a } diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/trim/autotrim_config.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/trim/autotrim_config.ksh index 924b56935def..c97772585737 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/trim/autotrim_config.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/trim/autotrim_config.ksh @@ -95,6 +95,7 @@ for type in "" "mirror" "raidz2" "draid"; do # Fill the pool, verify the vdevs are no longer sparse. file_write -o create -f /$TESTPOOL/file -b 1048576 -c $fill_mb -d R + sync_pool $TESTPOOL verify_vdevs "-ge" "$VDEV_MAX_MB" $VDEVS # Remove the file, wait for trim, verify the vdevs are now sparse. diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/trim/trim_config.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/trim/trim_config.ksh index 9a6e19e1c042..6a187a05b579 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/trim/trim_config.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/trim/trim_config.ksh @@ -94,6 +94,7 @@ for type in "" "mirror" "raidz2" "draid"; do # Fill the pool, verify the vdevs are no longer sparse. file_write -o create -f /$TESTPOOL/file -b 1048576 -c $fill_mb -d R + sync_pool $TESTPOOL verify_vdevs "-ge" "$VDEV_MAX_MB" $VDEVS # Remove the file, issue trim, verify the vdevs are now sparse. diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_trim.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_trim.ksh index b95eca73b8de..2c4ef28ab826 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_trim.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_trim.ksh @@ -83,6 +83,7 @@ function do_test { # Write to zvol log_must dd if=$datafile1 of=$zvolpath conv=fsync + sync_pool # Record how much space we've used (should be 5MB, with 128k # of tolerance). diff --git a/sys/modules/zfs/zfs_config.h b/sys/modules/zfs/zfs_config.h index 2b9a6c97cd7f..5c30abf047f4 100644 --- a/sys/modules/zfs/zfs_config.h +++ b/sys/modules/zfs/zfs_config.h @@ -315,7 +315,7 @@ /* #undef HAVE_FOLLOW_LINK_NAMEIDATA */ /* Define if compiler supports -Wformat-overflow */ -/* #undef HAVE_FORMAT_OVERFLOW */ +/* #undef HAVE_FORMAT_OVERFLOW */ /* fops->fsync() with range */ /* #undef HAVE_FSYNC_RANGE */ @@ -353,6 +353,9 @@ /* iops->get_acl() takes rcu */ /* #undef HAVE_GET_ACL_RCU */ +/* has iops->get_inode_acl() */ +/* #undef HAVE_GET_INODE_ACL */ + /* iops->get_link() cookie */ /* #undef HAVE_GET_LINK_COOKIE */ @@ -665,6 +668,9 @@ /* iops->set_acl() takes 4 args */ /* #undef HAVE_SET_ACL_USERNS */ +/* iops->set_acl() takes 4 args, arg2 is struct dentry * */ +/* #undef HAVE_SET_ACL_USERNS_DENTRY_ARG2 */ + /* set_cached_acl() is usable */ /* #undef HAVE_SET_CACHED_ACL_USABLE */ @@ -754,6 +760,9 @@ /* i_op->tmpfile() exists */ /* #undef HAVE_TMPFILE */ +/* i_op->tmpfile() uses old dentry signature */ +/* #undef HAVE_TMPFILE_DENTRY */ + /* i_op->tmpfile() has userns */ /* #undef HAVE_TMPFILE_USERNS */ @@ -976,7 +985,7 @@ /* #undef ZFS_IS_GPL_COMPATIBLE */ /* Define the project alias string. */ -#define ZFS_META_ALIAS "zfs-2.1.99-FreeBSD_g59493b63c" +#define ZFS_META_ALIAS "zfs-2.1.99-FreeBSD_g9cd71c860" /* Define the project author. */ #define ZFS_META_AUTHOR "OpenZFS" @@ -985,7 +994,7 @@ /* #undef ZFS_META_DATA */ /* Define the maximum compatible kernel version. */ -#define ZFS_META_KVER_MAX "6.0" +#define ZFS_META_KVER_MAX "6.1" /* Define the minimum compatible kernel version. */ #define ZFS_META_KVER_MIN "3.10" @@ -1006,7 +1015,7 @@ #define ZFS_META_NAME "zfs" /* Define the project release. */ -#define ZFS_META_RELEASE "FreeBSD_g59493b63c" +#define ZFS_META_RELEASE "FreeBSD_g9cd71c860" /* Define the project version. */ #define ZFS_META_VERSION "2.1.99" diff --git a/sys/modules/zfs/zfs_gitrev.h b/sys/modules/zfs/zfs_gitrev.h index 0b901ad44b6a..334a458f9647 100644 --- a/sys/modules/zfs/zfs_gitrev.h +++ b/sys/modules/zfs/zfs_gitrev.h @@ -1 +1 @@ -#define ZFS_META_GITREV "zfs-2.1.99-1610-g59493b63c" +#define ZFS_META_GITREV "zfs-2.1.99-1706-g9cd71c860"