Minor changes from Berkeley DB 1.86 and further improvements from OpenBSD.

This does not include the new hash routines since they will cause problems
when reading old hash files.

Since mpool(3) has been changed, provide a compatibility shim for older
binaries.

Obtained from:	OpenBSD
This commit is contained in:
Xin LI 2009-03-28 07:31:02 +00:00
parent 73590c342a
commit 9fc74a871c
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=190498
11 changed files with 237 additions and 130 deletions

View file

@ -30,7 +30,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)mpool.h 8.2 (Berkeley) 7/14/94
* @(#)mpool.h 8.4 (Berkeley) 11/2/95
* $FreeBSD$
*/
@ -47,7 +47,7 @@
* pool is handed an opaque MPOOL cookie which stores all of this information.
*/
#define HASHSIZE 128
#define HASHKEY(pgno) ((pgno - 1) % HASHSIZE)
#define HASHKEY(pgno) ((pgno - 1 + HASHSIZE) % HASHSIZE)
/* The BKT structures are the elements of the queues. */
typedef struct _bkt {
@ -58,6 +58,7 @@ typedef struct _bkt {
#define MPOOL_DIRTY 0x01 /* page needs to be written */
#define MPOOL_PINNED 0x02 /* page is pinned into memory */
#define MPOOL_INUSE 0x04 /* page address is valid */
u_int8_t flags; /* flags */
} BKT;
@ -68,7 +69,7 @@ typedef struct MPOOL {
pgno_t curcache; /* current number of cached pages */
pgno_t maxcache; /* max number of cached pages */
pgno_t npages; /* number of pages in the file */
u_long pagesize; /* file page size */
unsigned long pagesize; /* file page size */
int fd; /* file descriptor */
/* page in conversion routine */
void (*pgin)(void *, pgno_t, void *);
@ -76,25 +77,32 @@ typedef struct MPOOL {
void (*pgout)(void *, pgno_t, void *);
void *pgcookie; /* cookie for page in/out routines */
#ifdef STATISTICS
u_long cachehit;
u_long cachemiss;
u_long pagealloc;
u_long pageflush;
u_long pageget;
u_long pagenew;
u_long pageput;
u_long pageread;
u_long pagewrite;
unsigned long cachehit;
unsigned long cachemiss;
unsigned long pagealloc;
unsigned long pageflush;
unsigned long pageget;
unsigned long pagenew;
unsigned long pageput;
unsigned long pageread;
unsigned long pagewrite;
#endif
} MPOOL;
#define MPOOL_IGNOREPIN 0x01 /* Ignore if the page is pinned. */
#define MPOOL_PAGE_REQUEST 0x01 /* Allocate a new page with a
specific page number. */
#define MPOOL_PAGE_NEXT 0x02 /* Allocate a new page with the next
page number. */
__BEGIN_DECLS
MPOOL *mpool_open(void *, int, pgno_t, pgno_t);
void mpool_filter(MPOOL *, void (*)(void *, pgno_t, void *),
void (*)(void *, pgno_t, void *), void *);
void *mpool_new(MPOOL *, pgno_t *);
void *mpool_get(MPOOL *, pgno_t, u_int);
int mpool_put(MPOOL *, void *, u_int);
void *mpool_new(MPOOL *, pgno_t *, unsigned int);
void *mpool_get(MPOOL *, pgno_t, unsigned int);
int mpool_delete(MPOOL *, void *);
int mpool_put(MPOOL *, void *, unsigned int);
int mpool_sync(MPOOL *);
int mpool_close(MPOOL *);
#ifdef STATISTICS

View file

@ -16,7 +16,6 @@ FBSD_1.0 {
dbm_dirfno;
mpool_open;
mpool_filter;
mpool_new;
mpool_get;
mpool_put;
mpool_close;
@ -24,6 +23,11 @@ FBSD_1.0 {
mpool_stat;
};
FBSD_1.1 {
mpool_new;
mpool_delete;
};
FBSDprivate_1.0 {
__bt_open;
__dbpanic;

View file

@ -83,10 +83,9 @@ __bt_dump(DB *dbp)
}
#undef X
for (i = P_ROOT; (h = mpool_get(t->bt_mp, i, 0)) != NULL; ++i) {
for (i = P_ROOT;
(h = mpool_get(t->bt_mp, i, MPOOL_IGNOREPIN)) != NULL; ++i)
__bt_dpage(h);
(void)mpool_put(t->bt_mp, h, 0);
}
}
/*
@ -135,10 +134,8 @@ __bt_dnpage(DB *dbp, pgno_t pgno)
PAGE *h;
t = dbp->internal;
if ((h = mpool_get(t->bt_mp, pgno, 0)) != NULL) {
if ((h = mpool_get(t->bt_mp, pgno, MPOOL_IGNOREPIN)) != NULL)
__bt_dpage(h);
(void)mpool_put(t->bt_mp, h, 0);
}
}
/*
@ -257,7 +254,8 @@ __bt_stat(DB *dbp)
t = dbp->internal;
pcont = pinternal = pleaf = 0;
nkeys = ifree = lfree = 0;
for (i = P_ROOT; (h = mpool_get(t->bt_mp, i, 0)) != NULL; ++i) {
for (i = P_ROOT;
(h = mpool_get(t->bt_mp, i, MPOOL_IGNOREPIN)) != NULL; ++i)
switch (h->flags & P_TYPE) {
case P_BINTERNAL:
case P_RINTERNAL:
@ -274,22 +272,18 @@ __bt_stat(DB *dbp)
++pcont;
break;
}
(void)mpool_put(t->bt_mp, h, 0);
}
/* Count the levels of the tree. */
for (i = P_ROOT, levels = 0 ;; ++levels) {
h = mpool_get(t->bt_mp, i, 0);
h = mpool_get(t->bt_mp, i, MPOOL_IGNOREPIN);
if (h->flags & (P_BLEAF|P_RLEAF)) {
if (levels == 0)
levels = 1;
(void)mpool_put(t->bt_mp, h, 0);
break;
}
i = F_ISSET(t, R_RECNO) ?
GETRINTERNAL(h, 0)->pgno :
GETBINTERNAL(h, 0)->pgno;
(void)mpool_put(t->bt_mp, h, 0);
}
(void)fprintf(stderr, "%d level%s with %lu keys",

View file

@ -352,18 +352,25 @@ nroot(BTREE *t)
PAGE *meta, *root;
pgno_t npg;
if ((meta = mpool_get(t->bt_mp, 0, 0)) != NULL) {
mpool_put(t->bt_mp, meta, 0);
return (RET_SUCCESS);
if ((root = mpool_get(t->bt_mp, 1, 0)) != NULL) {
if (root->lower == 0 &&
root->pgno == 0 &&
root->linp[0] == 0) {
mpool_delete(t->bt_mp, root);
errno = EINVAL;
} else {
mpool_put(t->bt_mp, root, 0);
return (RET_SUCCESS);
}
}
if (errno != EINVAL) /* It's OK to not exist. */
return (RET_ERROR);
errno = 0;
if ((meta = mpool_new(t->bt_mp, &npg)) == NULL)
if ((meta = mpool_new(t->bt_mp, &npg, MPOOL_PAGE_NEXT)) == NULL)
return (RET_ERROR);
if ((root = mpool_new(t->bt_mp, &npg)) == NULL)
if ((root = mpool_new(t->bt_mp, &npg, MPOOL_PAGE_NEXT)) == NULL)
return (RET_ERROR);
if (npg != P_ROOT)

View file

@ -90,5 +90,5 @@ __bt_new(BTREE *t, pgno_t *npg)
F_SET(t, B_METADIRTY);
return (h);
}
return (mpool_new(t->bt_mp, npg));
return (mpool_new(t->bt_mp, npg, MPOOL_PAGE_NEXT));
}

View file

@ -1,4 +1,5 @@
# @(#)README 8.1 (Berkeley) 6/4/93
# $FreeBSD$
This package implements a superset of the hsearch and dbm/ndbm libraries.
@ -44,10 +45,6 @@ Test Programs:
NOTES:
The file search.h is provided for using the hsearch compatible interface
on BSD systems. On System V derived systems, search.h should appear in
/usr/include.
The man page ../man/db.3 explains the interface to the hashing system.
The file hash.ps is a postscript copy of a paper explaining
the history, implementation, and performance of the hash package.

View file

@ -43,60 +43,57 @@ __FBSDID("$FreeBSD$");
#include "page.h"
#include "extern.h"
#ifdef notdef
static u_int32_t hash1(const void *, size_t) __unused;
static u_int32_t hash2(const void *, size_t) __unused;
static u_int32_t hash3(const void *, size_t) __unused;
#endif
static u_int32_t hash4(const void *, size_t);
/* Global default hash function */
/* Default hash function. */
u_int32_t (*__default_hash)(const void *, size_t) = hash4;
#ifdef notdef
/*
* HASH FUNCTIONS
*
* Assume that we've already split the bucket to which this key hashes,
* calculate that bucket, and check that in fact we did already split it.
*
* This came from ejb's hsearch.
* EJB's original hsearch hash.
*/
#define PRIME1 37
#define PRIME2 1048583
static u_int32_t
hash1(keyarg, len)
const void *keyarg;
size_t len;
u_int32_t
hash1(const void *key, size_t len)
{
const u_char *key;
u_int32_t h;
u_int8_t *k;
h = 0;
k = (u_int8_t *)key;
/* Convert string to integer */
for (key = keyarg, h = 0; len--;)
h = h * PRIME1 ^ (*key++ - ' ');
while (len--)
h = h * PRIME1 ^ (*k++ - ' ');
h %= PRIME2;
return (h);
}
/*
* Phong's linear congruential hash
* Phong Vo's linear congruential hash
*/
#define dcharhash(h, c) ((h) = 0x63c63cd9*(h) + 0x9c39c33d + (c))
static u_int32_t
hash2(keyarg, len)
const void *keyarg;
size_t len;
u_int32_t
hash2(const void *key, size_t len)
{
const u_char *e, *key;
u_int32_t h;
u_char c;
u_int8_t *e, c, *k;
key = keyarg;
e = key + len;
for (h = 0; key != e;) {
c = *key++;
if (!c && key > e)
k = (u_int8_t *)key;
e = k + len;
for (h = 0; k != e;) {
c = *k++;
if (!c && k > e)
break;
dcharhash(h, c);
}
@ -110,101 +107,84 @@ hash2(keyarg, len)
* all 8 bytes. Essentially, this saves us 7 cmp & branch instructions. If
* this routine is heavily used enough, it's worth the ugly coding.
*
* OZ's original sdbm hash
* Ozan Yigit's original sdbm hash.
*/
static u_int32_t
hash3(keyarg, len)
const void *keyarg;
size_t len;
u_int32_t
hash3(const void *key, size_t len)
{
const u_char *key;
size_t loop;
u_int32_t h;
u_int32_t n, loop;
u_int8_t *k;
#define HASHC h = *key++ + 65599 * h
#define HASHC n = *k++ + 65599 * n
h = 0;
key = keyarg;
n = 0;
k = (u_int8_t *)key;
if (len > 0) {
loop = (len + 8 - 1) >> 3;
switch (len & (8 - 1)) {
case 0:
do {
do { /* All fall throughs */
HASHC;
/* FALLTHROUGH */
case 7:
HASHC;
/* FALLTHROUGH */
case 6:
HASHC;
/* FALLTHROUGH */
case 5:
HASHC;
/* FALLTHROUGH */
case 4:
HASHC;
/* FALLTHROUGH */
case 3:
HASHC;
/* FALLTHROUGH */
case 2:
HASHC;
/* FALLTHROUGH */
case 1:
HASHC;
} while (--loop);
}
}
return (h);
return (n);
}
#endif /* notdef */
/* Hash function from Chris Torek. */
static u_int32_t
hash4(keyarg, len)
const void *keyarg;
size_t len;
/* Chris Torek's hash function. */
u_int32_t
hash4(const void *key, size_t len)
{
const u_char *key;
size_t loop;
u_int32_t h;
u_int32_t h, loop;
const u_int8_t *k;
#define HASH4a h = (h << 5) - h + *key++;
#define HASH4b h = (h << 5) + h + *key++;
#define HASH4a h = (h << 5) - h + *k++;
#define HASH4b h = (h << 5) + h + *k++;
#define HASH4 HASH4b
h = 0;
key = keyarg;
k = key;
if (len > 0) {
loop = (len + 8 - 1) >> 3;
switch (len & (8 - 1)) {
case 0:
do {
do { /* All fall throughs */
HASH4;
/* FALLTHROUGH */
case 7:
HASH4;
/* FALLTHROUGH */
case 6:
HASH4;
/* FALLTHROUGH */
case 5:
HASH4;
/* FALLTHROUGH */
case 4:
HASH4;
/* FALLTHROUGH */
case 3:
HASH4;
/* FALLTHROUGH */
case 2:
HASH4;
/* FALLTHROUGH */
case 1:
HASH4;
} while (--loop);
}
}
return (h);
}

View file

@ -28,7 +28,7 @@
.\" @(#)mpool.3 8.1 (Berkeley) 6/4/93
.\" $FreeBSD$
.\"
.Dd June 4, 1993
.Dd February 25, 1999
.Dt MPOOL 3
.Os
.Sh NAME
@ -47,7 +47,9 @@
.Fa "void *pgcookie"
.Fc
.Ft void *
.Fn mpool_new "MPOOL *mp" "pgno_t *pgnoaddr"
.Fn mpool_new "MPOOL *mp" "pgno_t *pgnoaddr" "u_int flags"
.Ft int
.Fn mpool_delete "MPOOL *mp" "void *page"
.Ft void *
.Fn mpool_get "MPOOL *mp" "pgno_t pgno" "u_int flags"
.Ft int
@ -99,11 +101,11 @@ Both functions are called with the
.Fa pgcookie
pointer, the page number and a pointer to the page to being read or written.
.Pp
The
The function
.Fn mpool_new
function takes an
.Ft MPOOL
pointer and an address as arguments.
takes an
.Dv MPOOL
pointer, an address, and a set of flags as arguments.
If a new page can be allocated, a pointer to the page is returned and
the page number is stored into the
.Fa pgnoaddr
@ -113,6 +115,24 @@ Otherwise,
is returned and
.Va errno
is set.
The flags value is formed by
.Tn OR Ns 'ing
the following values:
.Bl -tag -width Ds
.It Dv MPOOL_PAGE_REQUEST
Allocate a new page with a specific page number.
.It Dv MPOOL_PAGE_NEXT
Allocate a new page with the next page number.
.El
.Pp
The function
.Fn mpool_delete
deletes the specified page from a pool and frees the page.
It takes an
.Dv MPOOL
pointer and a page as arguments.
The page must have been generated by
.Fn mpool_new .
.Pp
The
.Fn mpool_get

View file

@ -3,4 +3,4 @@
.PATH: ${.CURDIR}/db/mpool
SRCS+= mpool.c
SRCS+= mpool.c mpool-compat.c

View file

@ -0,0 +1,43 @@
/*-
* Copyright (c) 2009 Xin LI <delphij@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <db.h>
#include <mpool.h>
void *__mpool_new__44bsd(MPOOL *, pgno_t *);
void *
__mpool_new__44bsd(MPOOL *mp, pgno_t *pgnoaddr)
{
return (mpool_new(mp, pgnoaddr, MPOOL_PAGE_NEXT));
}
__sym_compat(mpool_new, __mpool_new_44bsd, FBSD_1.0);

View file

@ -110,9 +110,7 @@ mpool_filter(MPOOL *mp, void (*pgin) (void *, pgno_t, void *),
* Get a new page of memory.
*/
void *
mpool_new(mp, pgnoaddr)
MPOOL *mp;
pgno_t *pgnoaddr;
mpool_new(MPOOL *mp, pgno_t *pgnoaddr, u_int flags)
{
struct _hqh *head;
BKT *bp;
@ -131,8 +129,13 @@ mpool_new(mp, pgnoaddr)
*/
if ((bp = mpool_bkt(mp)) == NULL)
return (NULL);
*pgnoaddr = bp->pgno = mp->npages++;
bp->flags = MPOOL_PINNED;
if (flags == MPOOL_PAGE_REQUEST) {
mp->npages++;
bp->pgno = *pgnoaddr;
} else
bp->pgno = *pgnoaddr = mp->npages++;
bp->flags = MPOOL_PINNED | MPOOL_INUSE;
head = &mp->hqh[HASHKEY(bp->pgno)];
TAILQ_INSERT_HEAD(head, bp, hq);
@ -140,6 +143,32 @@ mpool_new(mp, pgnoaddr)
return (bp->page);
}
int
mpool_delete(MPOOL *mp, void *page)
{
struct _hqh *head;
BKT *bp;
bp = (BKT *)((char *)page - sizeof(BKT));
#ifdef DEBUG
if (!(bp->flags & MPOOL_PINNED)) {
(void)fprintf(stderr,
"mpool_delete: page %d not pinned\n", bp->pgno);
abort();
}
#endif
/* Remove from the hash and lru queues. */
head = &mp->hqh[HASHKEY(bp->pgno)];
TAILQ_REMOVE(head, bp, hq);
TAILQ_REMOVE(&mp->lqh, bp, q);
free(bp);
mp->curcache--;
return (RET_SUCCESS);
}
/*
* mpool_get
* Get a page.
@ -154,12 +183,6 @@ mpool_get(MPOOL *mp, pgno_t pgno,
off_t off;
int nr;
/* Check for attempt to retrieve a non-existent page. */
if (pgno >= mp->npages) {
errno = EINVAL;
return (NULL);
}
#ifdef STATISTICS
++mp->pageget;
#endif
@ -167,7 +190,7 @@ mpool_get(MPOOL *mp, pgno_t pgno,
/* Check for a page that is cached. */
if ((bp = mpool_look(mp, pgno)) != NULL) {
#ifdef DEBUG
if (bp->flags & MPOOL_PINNED) {
if (!(flags & MPOOL_IGNOREPIN) && bp->flags & MPOOL_PINNED) {
(void)fprintf(stderr,
"mpool_get: page %d already pinned\n", bp->pgno);
abort();
@ -193,20 +216,38 @@ mpool_get(MPOOL *mp, pgno_t pgno,
return (NULL);
/* Read in the contents. */
off = mp->pagesize * pgno;
if ((nr = pread(mp->fd, bp->page, mp->pagesize, off)) != mp->pagesize) {
switch (nr) {
case -1:
/* errno is set for us by pread(). */
free(bp);
mp->curcache--;
return (NULL);
case 0:
/*
* A zero-length read means you need to create a
* new page.
*/
memset(bp->page, 0, mp->pagesize);
break;
default:
/* A partial read is definitely bad. */
free(bp);
mp->curcache--;
errno = EINVAL;
return (NULL);
}
}
#ifdef STATISTICS
++mp->pageread;
#endif
off = mp->pagesize * pgno;
nr = pread(mp->fd, bp->page, mp->pagesize, off);
if (nr != mp->pagesize) {
if (nr >= 0)
errno = EFTYPE;
return (NULL);
}
/* Set the page number, pin the page. */
bp->pgno = pgno;
bp->flags = MPOOL_PINNED;
if (!(flags & MPOOL_IGNOREPIN))
bp->flags = MPOOL_PINNED;
bp->flags |= MPOOL_INUSE;
/*
* Add the page to the head of the hash chain and the tail
@ -245,7 +286,8 @@ mpool_put(MPOOL *mp, void *page, u_int flags)
}
#endif
bp->flags &= ~MPOOL_PINNED;
bp->flags |= flags & MPOOL_DIRTY;
if (flags & MPOOL_DIRTY)
bp->flags |= flags & MPOOL_DIRTY;
return (RET_SUCCESS);
}
@ -329,6 +371,7 @@ mpool_bkt(MPOOL *mp)
bp->page = spage;
}
#endif
bp->flags = 0;
return (bp);
}
@ -338,6 +381,7 @@ new: if ((bp = (BKT *)calloc(1, sizeof(BKT) + mp->pagesize)) == NULL)
++mp->pagealloc;
#endif
bp->page = (char *)bp + sizeof(BKT);
bp->flags = 0;
++mp->curcache;
return (bp);
}
@ -363,6 +407,15 @@ mpool_write(MPOOL *mp, BKT *bp)
if (pwrite(mp->fd, bp->page, mp->pagesize, off) != mp->pagesize)
return (RET_ERROR);
/*
* Re-run through the input filter since this page may soon be
* accessed via the cache, and whatever the user's output filter
* did may screw things up if we don't let the input filter
* restore the in-core copy.
*/
if (mp->pgin)
(mp->pgin)(mp->pgcookie, bp->pgno, bp->page);
bp->flags &= ~MPOOL_DIRTY;
return (RET_SUCCESS);
}
@ -379,7 +432,8 @@ mpool_look(MPOOL *mp, pgno_t pgno)
head = &mp->hqh[HASHKEY(pgno)];
TAILQ_FOREACH(bp, head, hq)
if (bp->pgno == pgno) {
if ((bp->pgno == pgno) &&
((bp->flags & MPOOL_INUSE) == MPOOL_INUSE)) {
#ifdef STATISTICS
++mp->cachehit;
#endif