Improve performance in a few key areas:

o Split the compression across several worker threads. By default, "several"
   matches number of CPUs, capped at 24 for sanity when running on a very big
   hardwares. Provide option to set that number manually;

 o Fix bug inherited from the mkulzma (R.I.P) which degraded already slow LZMA
   compression even further by calling function to release compression state
   after processing each block.

   It is neither documented as required nor actually required by the LZMA
   library. This caused spree of system calls to release memory and then map
   it again for every block. LZMA compression is more than 2x faster after this
   change alone;

 o Record time it takes to do compression and report throughput achieved.

 o Add simple first-level 256 entry hash table for de-dup code, so it's not
   becoming a bottleneck at big files.
This commit is contained in:
Maxim Sobolev 2016-04-23 07:23:43 +00:00
parent 416ee66e25
commit 4fc55e3e46
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=298504
21 changed files with 1038 additions and 193 deletions

View file

@ -2,8 +2,11 @@
PROG= mkuzip
MAN= mkuzip.8
SRCS= mkuzip.c mkuz_blockcache.c mkuz_lzma.c mkuz_zlib.c
SRCS= mkuzip.c mkuz_blockcache.c mkuz_lzma.c mkuz_zlib.c mkuz_conveyor.c \
mkuz_blk.c mkuz_fqueue.c mkuz_time.c
LIBADD= z md lzma
#CFLAGS+= -DMKUZ_DEBUG
LIBADD= z md lzma pthread
.include <bsd.prog.mk>

45
usr.bin/mkuzip/mkuz_blk.c Normal file
View file

@ -0,0 +1,45 @@
/*
* Copyright (c) 2004-2016 Maxim Sobolev <sobomax@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <stdint.h>
#include <stdlib.h>
#include "mkuzip.h"
#include "mkuz_blk.h"
struct mkuz_blk *
mkuz_blk_ctor(size_t blen)
{
struct mkuz_blk *rval;
rval = mkuz_safe_zmalloc(sizeof(struct mkuz_blk) + blen);
rval->alen = blen;
rval->br_offset = OFFSET_UNDEF;
return (rval);
}

48
usr.bin/mkuzip/mkuz_blk.h Normal file
View file

@ -0,0 +1,48 @@
/*
* Copyright (c) 2004-2016 Maxim Sobolev <sobomax@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#define OFFSET_UNDEF UINT64_MAX
struct mkuz_blk_info {
uint64_t offset;
size_t len;
uint32_t blkno;
unsigned char digest[16];
};
#define MKUZ_BLK_EOF (void *)0x1
#define MKUZ_BLK_MORE (void *)0x2
struct mkuz_blk {
struct mkuz_blk_info info;
size_t alen;
uint64_t br_offset;
unsigned char data[];
};
struct mkuz_blk *mkuz_blk_ctor(size_t);

View file

@ -0,0 +1,35 @@
/*
* Copyright (c) 2016 Maxim Sobolev <sobomax@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
struct mkuz_blk;
struct mkuz_bchain_link;
struct mkuz_bchain_link {
struct mkuz_blk *this;
struct mkuz_bchain_link *prev;
};

View file

@ -22,97 +22,107 @@
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/types.h>
#include <err.h>
#include <md5.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#if defined(MKUZ_DEBUG)
# include <assert.h>
# include <stdio.h>
#endif
#include "mkuz_blockcache.h"
#include "mkuz_blk.h"
struct mkuz_blkcache {
struct mkuz_blkcache_hit hit;
off_t data_offset;
unsigned char digest[16];
struct mkuz_blkcache *next;
struct mkuz_blkcache_itm {
struct mkuz_blk_info hit;
struct mkuz_blkcache_itm *next;
};
static struct mkuz_blkcache blkcache;
static struct mkuz_blkcache {
struct mkuz_blkcache_itm first[256];
} blkcache;
static int
verify_match(int fd, off_t data_offset, void *data, ssize_t len,
struct mkuz_blkcache *bcep)
verify_match(int fd, const struct mkuz_blk *cbp, struct mkuz_blkcache_itm *bcep)
{
void *vbuf;
ssize_t rlen;
int rval;
rval = -1;
vbuf = malloc(len);
vbuf = malloc(cbp->info.len);
if (vbuf == NULL) {
goto e0;
}
if (lseek(fd, bcep->data_offset, SEEK_SET) < 0) {
if (lseek(fd, bcep->hit.offset, SEEK_SET) < 0) {
goto e1;
}
rlen = read(fd, vbuf, len);
if (rlen != len) {
rlen = read(fd, vbuf, cbp->info.len);
if (rlen < 0 || (unsigned)rlen != cbp->info.len) {
goto e2;
}
rval = (memcmp(data, vbuf, len) == 0) ? 1 : 0;
rval = (memcmp(cbp->data, vbuf, cbp->info.len) == 0) ? 1 : 0;
e2:
lseek(fd, data_offset, SEEK_SET);
lseek(fd, cbp->info.offset, SEEK_SET);
e1:
free(vbuf);
e0:
return (rval);
}
struct mkuz_blkcache_hit *
mkuz_blkcache_regblock(int fd, uint32_t blkno, off_t offset, ssize_t len,
void *data)
{
struct mkuz_blkcache *bcep;
MD5_CTX mcontext;
off_t data_offset;
unsigned char mdigest[16];
int rval;
#define I2J(x) ((intmax_t)(x))
#define U2J(x) ((uintmax_t)(x))
data_offset = lseek(fd, 0, SEEK_CUR);
if (data_offset < 0) {
return (NULL);
static unsigned char
digest_fold(const unsigned char *mdigest)
{
int i;
unsigned char rval;
rval = mdigest[0];
for (i = 1; i < 16; i++) {
rval = rval ^ mdigest[i];
}
MD5Init(&mcontext);
MD5Update(&mcontext, data, len);
MD5Final(mdigest, &mcontext);
if (blkcache.hit.len == 0) {
bcep = &blkcache;
return (rval);
}
struct mkuz_blk_info *
mkuz_blkcache_regblock(int fd, const struct mkuz_blk *bp)
{
struct mkuz_blkcache_itm *bcep;
int rval;
unsigned char h;
#if defined(MKUZ_DEBUG)
assert((unsigned)lseek(fd, 0, SEEK_CUR) == bp->info.offset);
#endif
h = digest_fold(bp->info.digest);
if (blkcache.first[h].hit.len == 0) {
bcep = &blkcache.first[h];
} else {
for (bcep = &blkcache; bcep != NULL; bcep = bcep->next) {
if (bcep->hit.len != len)
for (bcep = &blkcache.first[h]; bcep != NULL; bcep = bcep->next) {
if (bcep->hit.len != bp->info.len)
continue;
if (memcmp(mdigest, bcep->digest, sizeof(mdigest)) == 0) {
if (memcmp(bp->info.digest, bcep->hit.digest,
sizeof(bp->info.digest)) == 0) {
break;
}
}
if (bcep != NULL) {
rval = verify_match(fd, data_offset, data, len, bcep);
rval = verify_match(fd, bp, bcep);
if (rval == 1) {
#if defined(MKUZ_DEBUG)
fprintf(stderr, "cache hit %d, %d, %d\n",
(int)bcep->hit.offset, (int)data_offset, (int)len);
fprintf(stderr, "cache hit %jd, %jd, %jd, %jd\n",
I2J(bcep->hit.blkno), I2J(bcep->hit.offset),
I2J(bp->info.offset), I2J(bp->info.len));
#endif
return (&bcep->hit);
}
@ -126,17 +136,13 @@ mkuz_blkcache_regblock(int fd, uint32_t blkno, off_t offset, ssize_t len,
warn("verify_match");
return (NULL);
}
bcep = malloc(sizeof(struct mkuz_blkcache));
bcep = malloc(sizeof(struct mkuz_blkcache_itm));
if (bcep == NULL)
return (NULL);
memset(bcep, '\0', sizeof(struct mkuz_blkcache));
bcep->next = blkcache.next;
blkcache.next = bcep;
memset(bcep, '\0', sizeof(struct mkuz_blkcache_itm));
bcep->next = blkcache.first[h].next;
blkcache.first[h].next = bcep;
}
memcpy(bcep->digest, mdigest, sizeof(mdigest));
bcep->data_offset = data_offset;
bcep->hit.offset = offset;
bcep->hit.len = len;
bcep->hit.blkno = blkno;
bcep->hit = bp->info;
return (NULL);
}

View file

@ -26,11 +26,6 @@
* $FreeBSD$
*/
struct mkuz_blkcache_hit {
uint64_t offset;
ssize_t len;
uint32_t blkno;
};
struct mkuz_blk;
struct mkuz_blkcache_hit *mkuz_blkcache_regblock(int, uint32_t, off_t, ssize_t,
void *);
struct mkuz_blk_info *mkuz_blkcache_regblock(int, const struct mkuz_blk *);

40
usr.bin/mkuzip/mkuz_cfg.h Normal file
View file

@ -0,0 +1,40 @@
/*
* Copyright (c) 2016 Maxim Sobolev <sobomax@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
struct mkuz_conveyor;
struct mkuz_cfg {
int fdr;
int fdw;
int verbose;
int no_zcomp;
int en_dedup;
int nworkers;
int blksz;
const struct mkuz_format *handler;
};

View file

@ -0,0 +1,129 @@
/*
* Copyright (c) 2004-2016 Maxim Sobolev <sobomax@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/types.h>
#include <err.h>
#include <inttypes.h>
#include <md5.h>
#include <pthread.h>
#include <stdlib.h>
#include <strings.h>
#if defined(MKUZ_DEBUG)
# include <stdio.h>
#endif
#include "mkuz_conveyor.h"
#include "mkuz_cfg.h"
#include "mkuzip.h"
#include "mkuz_format.h"
#include "mkuz_blk.h"
#include "mkuz_fqueue.h"
#include "mkuz_blk_chain.h"
static void compute_digest(struct mkuz_blk *);
struct cw_args {
struct mkuz_conveyor *cvp;
struct mkuz_cfg *cfp;
};
static void *
cworker(void *p)
{
struct cw_args *cwp;
struct mkuz_cfg *cfp;
struct mkuz_blk *oblk, *iblk;
struct mkuz_conveyor *cvp;
void *c_ctx;
cwp = (struct cw_args *)p;
cfp = cwp->cfp;
cvp = cwp->cvp;
free(cwp);
c_ctx = cfp->handler->f_init(cfp->blksz);
for (;;) {
iblk = mkuz_fqueue_deq(cvp->wrk_queue);
if (iblk == MKUZ_BLK_EOF) {
/* Let other threads to see the EOF block */
mkuz_fqueue_enq(cvp->wrk_queue, iblk);
break;
}
if (cfp->no_zcomp == 0 &&
mkuz_memvcmp(iblk->data, '\0', iblk->info.len) != 0) {
/* All zeroes block */
oblk = mkuz_blk_ctor(0);
} else {
oblk = cfp->handler->f_compress(c_ctx, iblk);
if (cfp->en_dedup != 0) {
compute_digest(oblk);
}
}
oblk->info.blkno = iblk->info.blkno;
mkuz_fqueue_enq(cvp->results, oblk);
free(iblk);
}
return (NULL);
}
static void
compute_digest(struct mkuz_blk *bp)
{
MD5_CTX mcontext;
MD5Init(&mcontext);
MD5Update(&mcontext, bp->data, bp->info.len);
MD5Final(bp->info.digest, &mcontext);
}
struct mkuz_conveyor *
mkuz_conveyor_ctor(struct mkuz_cfg *cfp)
{
struct mkuz_conveyor *cp;
struct cw_args *cwp;
int i, r;
cp = mkuz_safe_zmalloc(sizeof(struct mkuz_conveyor) +
(sizeof(pthread_t) * cfp->nworkers));
cp->wrk_queue = mkuz_fqueue_ctor(1);
cp->results = mkuz_fqueue_ctor(1);
for (i = 0; i < cfp->nworkers; i++) {
cwp = mkuz_safe_zmalloc(sizeof(struct cw_args));
cwp->cfp = cfp;
cwp->cvp = cp;
r = pthread_create(&cp->wthreads[i], NULL, cworker, (void *)cwp);
if (r != 0) {
errx(1, "mkuz_conveyor_ctor: pthread_create() failed");
/* Not reached */
}
}
return (cp);
}

View file

@ -0,0 +1,52 @@
/*
* Copyright (c) 2016 Maxim Sobolev <sobomax@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
struct mkuz_fifo_queue;
#define ITEMS_PER_WORKER 4
#define MAX_WORKERS_AUTO 24
struct mkuz_conveyor {
/*
* Work items are places in here, and picked up by workers in a FIFO
* fashion.
*/
struct mkuz_fifo_queue *wrk_queue;
/*
* Results are dropped into this FIFO and consumer is buzzed to pick them
* up
*/
struct mkuz_fifo_queue *results;
pthread_t wthreads[];
};
struct mkuz_cfg;
struct mkuz_conveyor *mkuz_conveyor_ctor(struct mkuz_cfg *);

View file

@ -0,0 +1,39 @@
/*
* Copyright (c) 2016 Maxim Sobolev <sobomax@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#define DEFINE_RAW_METHOD(func, rval, args...) typedef rval (*func##_t)(args)
DEFINE_RAW_METHOD(f_init, void *, uint32_t);
DEFINE_RAW_METHOD(f_compress, struct mkuz_blk *, void *, const struct mkuz_blk *);
struct mkuz_format {
const char *magic;
const char *default_sufx;
f_init_t f_init;
f_compress_t f_compress;
};

View file

@ -0,0 +1,214 @@
/*
* Copyright (c) 2004-2016 Maxim Sobolev <sobomax@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <err.h>
#include <pthread.h>
#include <stdint.h>
#include <stdlib.h>
#if defined(MKUZ_DEBUG)
# include <assert.h>
#endif
#include "mkuzip.h"
#include "mkuz_fqueue.h"
#include "mkuz_conveyor.h"
#include "mkuz_blk.h"
#include "mkuz_blk_chain.h"
struct mkuz_fifo_queue *
mkuz_fqueue_ctor(int wakeup_len)
{
struct mkuz_fifo_queue *fqp;
fqp = mkuz_safe_zmalloc(sizeof(struct mkuz_fifo_queue));
fqp->wakeup_len = wakeup_len;
if (pthread_mutex_init(&fqp->mtx, NULL) != 0) {
errx(1, "pthread_mutex_init() failed");
}
if (pthread_cond_init(&fqp->cvar, NULL) != 0) {
errx(1, "pthread_cond_init() failed");
}
return (fqp);
}
void
mkuz_fqueue_enq(struct mkuz_fifo_queue *fqp, struct mkuz_blk *bp)
{
struct mkuz_bchain_link *ip;
ip = mkuz_safe_zmalloc(sizeof(struct mkuz_bchain_link));
ip->this = bp;
pthread_mutex_lock(&fqp->mtx);
if (fqp->first != NULL) {
fqp->first->prev = ip;
} else {
fqp->last = ip;
}
fqp->first = ip;
fqp->length += 1;
if (fqp->length >= fqp->wakeup_len) {
pthread_cond_signal(&fqp->cvar);
}
pthread_mutex_unlock(&fqp->mtx);
}
#if defined(NOTYET)
int
mkuz_fqueue_enq_all(struct mkuz_fifo_queue *fqp, struct mkuz_bchain_link *cip_f,
struct mkuz_bchain_link *cip_l, int clen)
{
int rval;
pthread_mutex_lock(&fqp->mtx);
if (fqp->first != NULL) {
fqp->first->prev = cip_l;
} else {
fqp->last = cip_l;
}
fqp->first = cip_f;
fqp->length += clen;
rval = fqp->length;
if (fqp->length >= fqp->wakeup_len) {
pthread_cond_signal(&fqp->cvar);
}
pthread_mutex_unlock(&fqp->mtx);
return (rval);
}
#endif
static int
mkuz_fqueue_check(struct mkuz_fifo_queue *fqp, cmp_cb_t cmp_cb, void *cap)
{
struct mkuz_bchain_link *ip;
for (ip = fqp->last; ip != NULL; ip = ip->prev) {
if (cmp_cb(ip->this, cap)) {
return (1);
}
}
return (0);
}
struct mkuz_blk *
mkuz_fqueue_deq_when(struct mkuz_fifo_queue *fqp, cmp_cb_t cmp_cb, void *cap)
{
struct mkuz_bchain_link *ip, *newlast, *newfirst, *mip;
struct mkuz_blk *bp;
pthread_mutex_lock(&fqp->mtx);
while (fqp->last == NULL || !mkuz_fqueue_check(fqp, cmp_cb, cap)) {
pthread_cond_wait(&fqp->cvar, &fqp->mtx);
}
if (cmp_cb(fqp->last->this, cap)) {
mip = fqp->last;
fqp->last = mip->prev;
if (fqp->last == NULL) {
#if defined(MKUZ_DEBUG)
assert(fqp->length == 1);
#endif
fqp->first = NULL;
}
} else {
#if defined(MKUZ_DEBUG)
assert(fqp->length > 1);
#endif
newfirst = newlast = fqp->last;
mip = NULL;
for (ip = fqp->last->prev; ip != NULL; ip = ip->prev) {
if (cmp_cb(ip->this, cap)) {
mip = ip;
continue;
}
newfirst->prev = ip;
newfirst = ip;
}
newfirst->prev = NULL;
fqp->first = newfirst;
fqp->last = newlast;
}
fqp->length -= 1;
pthread_mutex_unlock(&fqp->mtx);
bp = mip->this;
free(mip);
return bp;
}
struct mkuz_blk *
mkuz_fqueue_deq(struct mkuz_fifo_queue *fqp)
{
struct mkuz_bchain_link *ip;
struct mkuz_blk *bp;
pthread_mutex_lock(&fqp->mtx);
while (fqp->last == NULL) {
pthread_cond_wait(&fqp->cvar, &fqp->mtx);
}
#if defined(MKUZ_DEBUG)
assert(fqp->length > 0);
#endif
ip = fqp->last;
fqp->last = ip->prev;
if (fqp->last == NULL) {
#if defined(MKUZ_DEBUG)
assert(fqp->length == 1);
#endif
fqp->first = NULL;
}
fqp->length -= 1;
pthread_mutex_unlock(&fqp->mtx);
bp = ip->this;
free(ip);
return bp;
}
#if defined(NOTYET)
struct mkuz_bchain_link *
mkuz_fqueue_deq_all(struct mkuz_fifo_queue *fqp, int *rclen)
{
struct mkuz_bchain_link *rchain;
pthread_mutex_lock(&fqp->mtx);
while (fqp->last == NULL) {
pthread_cond_wait(&fqp->cvar, &fqp->mtx);
}
#if defined(MKUZ_DEBUG)
assert(fqp->length > 0);
#endif
rchain = fqp->last;
fqp->first = fqp->last = NULL;
*rclen = fqp->length;
fqp->length = 0;
pthread_mutex_unlock(&fqp->mtx);
return (rchain);
}
#endif

View file

@ -0,0 +1,51 @@
/*
* Copyright (c) 2004-2016 Maxim Sobolev <sobomax@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
struct mkuz_fifo_queue {
pthread_mutex_t mtx;
pthread_cond_t cvar;
struct mkuz_bchain_link *first;
struct mkuz_bchain_link *last;
int length;
int wakeup_len;
};
struct mkuz_blk;
struct mkuz_bchain_link;
DEFINE_RAW_METHOD(cmp_cb, int, const struct mkuz_blk *, void *);
struct mkuz_fifo_queue *mkuz_fqueue_ctor(int);
void mkuz_fqueue_enq(struct mkuz_fifo_queue *, struct mkuz_blk *);
struct mkuz_blk *mkuz_fqueue_deq(struct mkuz_fifo_queue *);
struct mkuz_blk *mkuz_fqueue_deq_when(struct mkuz_fifo_queue *, cmp_cb_t, void *);
#if defined(NOTYET)
struct mkuz_bchain_link *mkuz_fqueue_deq_all(struct mkuz_fifo_queue *, int *);
int mkuz_fqueue_enq_all(struct mkuz_fifo_queue *, struct mkuz_bchain_link *,
struct mkuz_bchain_link *, int);
#endif

View file

@ -23,7 +23,6 @@
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
*/
#include <sys/cdefs.h>
@ -37,6 +36,7 @@ __FBSDID("$FreeBSD$");
#include "mkuzip.h"
#include "mkuz_lzma.h"
#include "mkuz_blk.h"
#define USED_BLOCKSIZE DEV_BSIZE
@ -44,15 +44,16 @@ struct mkuz_lzma {
lzma_filter filters[2];
lzma_options_lzma opt_lzma;
lzma_stream strm;
char *obuf;
uint32_t blksz;
};
static struct mkuz_lzma ulzma = {.strm = LZMA_STREAM_INIT};
static const lzma_stream lzma_stream_init = LZMA_STREAM_INIT;
void *
mkuz_lzma_init(uint32_t blksz)
{
struct mkuz_lzma *ulp;
if (blksz % USED_BLOCKSIZE != 0) {
errx(1, "cluster size should be multiple of %d",
USED_BLOCKSIZE);
@ -62,27 +63,34 @@ mkuz_lzma_init(uint32_t blksz)
errx(1, "cluster size is too large");
/* Not reached */
}
ulzma.obuf = mkuz_safe_malloc(blksz * 2);
ulp = mkuz_safe_zmalloc(sizeof(struct mkuz_lzma));
/* Init lzma encoder */
if (lzma_lzma_preset(&ulzma.opt_lzma, LZMA_PRESET_DEFAULT))
ulp->strm = lzma_stream_init;
if (lzma_lzma_preset(&ulp->opt_lzma, LZMA_PRESET_DEFAULT))
errx(1, "Error loading LZMA preset");
ulzma.filters[0].id = LZMA_FILTER_LZMA2;
ulzma.filters[0].options = &ulzma.opt_lzma;
ulzma.filters[1].id = LZMA_VLI_UNKNOWN;
ulp->filters[0].id = LZMA_FILTER_LZMA2;
ulp->filters[0].options = &ulp->opt_lzma;
ulp->filters[1].id = LZMA_VLI_UNKNOWN;
ulzma.blksz = blksz;
ulp->blksz = blksz;
return (ulzma.obuf);
return (void *)ulp;
}
void
mkuz_lzma_compress(const char *ibuf, uint32_t *destlen)
struct mkuz_blk *
mkuz_lzma_compress(void *p, const struct mkuz_blk *iblk)
{
lzma_ret ret;
struct mkuz_blk *rval;
struct mkuz_lzma *ulp;
ret = lzma_stream_encoder(&ulzma.strm, ulzma.filters, LZMA_CHECK_CRC32);
ulp = (struct mkuz_lzma *)p;
rval = mkuz_blk_ctor(ulp->blksz * 2);
ret = lzma_stream_encoder(&ulp->strm, ulp->filters, LZMA_CHECK_CRC32);
if (ret != LZMA_OK) {
if (ret == LZMA_MEMLIMIT_ERROR)
errx(1, "can't compress data: LZMA_MEMLIMIT_ERROR");
@ -90,21 +98,24 @@ mkuz_lzma_compress(const char *ibuf, uint32_t *destlen)
errx(1, "can't compress data: LZMA compressor ERROR");
}
ulzma.strm.next_in = ibuf;
ulzma.strm.avail_in = ulzma.blksz;
ulzma.strm.next_out = ulzma.obuf;
ulzma.strm.avail_out = ulzma.blksz * 2;
ulp->strm.next_in = iblk->data;
ulp->strm.avail_in = ulp->blksz;
ulp->strm.next_out = rval->data;
ulp->strm.avail_out = rval->alen;
ret = lzma_code(&ulzma.strm, LZMA_FINISH);
ret = lzma_code(&ulp->strm, LZMA_FINISH);
if (ret != LZMA_STREAM_END) {
/* Error */
errx(1, "lzma_code FINISH failed, code=%d, pos(in=%zd, "
"out=%zd)", ret, (ulzma.blksz - ulzma.strm.avail_in),
(ulzma.blksz * 2 - ulzma.strm.avail_out));
"out=%zd)", ret, (ulp->blksz - ulp->strm.avail_in),
(ulp->blksz * 2 - ulp->strm.avail_out));
}
lzma_end(&ulzma.strm);
#if 0
lzma_end(&ulp->strm);
#endif
*destlen = (ulzma.blksz * 2) - ulzma.strm.avail_out;
rval->info.len = rval->alen - ulp->strm.avail_out;
return (rval);
}

View file

@ -39,4 +39,4 @@
#define DEFAULT_SUFX_LZMA ".ulzma"
void *mkuz_lzma_init(uint32_t);
void mkuz_lzma_compress(const char *, uint32_t *);
struct mkuz_blk *mkuz_lzma_compress(void *, const struct mkuz_blk *);

View file

@ -0,0 +1,45 @@
/*
* Copyright (c) 2004-2016 Maxim Sobolev <sobomax@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <math.h>
#include <stdint.h>
#include <time.h>
#include "mkuz_time.h"
double
getdtime(void)
{
struct timespec tp;
if (clock_gettime(CLOCK_MONOTONIC, &tp) == -1)
return (-1);
return timespec2dtime(&tp);
}

View file

@ -0,0 +1,41 @@
/*
* Copyright (c) 2004-2016 Maxim Sobolev <sobomax@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _RTPP_TIME_H_
#define _RTPP_TIME_H_
#define SEC(x) ((x)->tv_sec)
#define NSEC(x) ((x)->tv_nsec)
#define timespec2dtime(s) ((double)SEC(s) + \
(double)NSEC(s) / 1000000000.0)
/* Function prototypes */
double getdtime(void);
#endif

View file

@ -22,7 +22,6 @@
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
*/
#include <sys/cdefs.h>
@ -36,18 +35,18 @@ __FBSDID("$FreeBSD$");
#include "mkuzip.h"
#include "mkuz_zlib.h"
#include "mkuz_blk.h"
struct mkuz_zlib {
char *obuf;
uLongf oblen;
uint32_t blksz;
};
static struct mkuz_zlib uzip;
void *
mkuz_zlib_init(uint32_t blksz)
{
struct mkuz_zlib *zp;
if (blksz % DEV_BSIZE != 0) {
errx(1, "cluster size should be multiple of %d",
DEV_BSIZE);
@ -57,25 +56,32 @@ mkuz_zlib_init(uint32_t blksz)
errx(1, "cluster size is too large");
/* Not reached */
}
uzip.oblen = compressBound(blksz);
uzip.obuf = mkuz_safe_malloc(uzip.oblen);
uzip.blksz = blksz;
zp = mkuz_safe_zmalloc(sizeof(struct mkuz_zlib));
zp->oblen = compressBound(blksz);
zp->blksz = blksz;
return (uzip.obuf);
return (void *)zp;
}
void
mkuz_zlib_compress(const char *ibuf, uint32_t *destlen)
struct mkuz_blk *
mkuz_zlib_compress(void *p, const struct mkuz_blk *iblk)
{
uLongf destlen_z;
struct mkuz_blk *rval;
struct mkuz_zlib *zp;
destlen_z = uzip.oblen;
if (compress2(uzip.obuf, &destlen_z, ibuf, uzip.blksz,
zp = (struct mkuz_zlib *)p;
rval = mkuz_blk_ctor(zp->oblen);
destlen_z = rval->alen;
if (compress2(rval->data, &destlen_z, iblk->data, zp->blksz,
Z_BEST_COMPRESSION) != Z_OK) {
errx(1, "can't compress data: compress2() "
"failed");
/* Not reached */
}
*destlen = (uint32_t)destlen_z;
rval->info.len = (uint32_t)destlen_z;
return (rval);
}

View file

@ -33,4 +33,4 @@
"mount_cd9660 /dev/`mdconfig -af $0`.uzip $1\nexit $?\n"
void *mkuz_zlib_init(uint32_t);
void mkuz_zlib_compress(const char *, uint32_t *);
struct mkuz_blk *mkuz_zlib_compress(void *, const struct mkuz_blk *);

View file

@ -38,6 +38,7 @@ class
.Op Fl v
.Op Fl o Ar outfile
.Op Fl s Ar cluster_size
.Op Fl j Ar compression_jobs
.Ar infile
.Sh DESCRIPTION
The
@ -121,6 +122,15 @@ typically around 3-5% of a final size of the compressed image.
.It Fl S
Print summary about the compression ratio as well as output
file size after file has been processed.
.It Fl j Ar compression_jobs
Specify the number of compression jobs that
.Nm
runs in parallel to speed up compression.
When option is not specified the number of jobs set to be equal
to the value of
.Va hw.ncpu
.Xr sysctl 8
variable.
.El
.Sh NOTES
The compression ratio largely depends on the cluster size used.

View file

@ -22,7 +22,6 @@
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
*/
#include <sys/cdefs.h>
@ -32,13 +31,17 @@ __FBSDID("$FreeBSD$");
#include <sys/disk.h>
#include <sys/endian.h>
#include <sys/param.h>
#include <sys/sysctl.h>
#include <sys/stat.h>
#include <sys/uio.h>
#include <netinet/in.h>
#include <assert.h>
#include <ctype.h>
#include <err.h>
#include <fcntl.h>
#include <pthread.h>
#include <signal.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@ -49,21 +52,15 @@ __FBSDID("$FreeBSD$");
#include "mkuz_blockcache.h"
#include "mkuz_zlib.h"
#include "mkuz_lzma.h"
#define DEFINE_RAW_METHOD(func, rval, args...) typedef rval (*func##_t)(args)
#include "mkuz_blk.h"
#include "mkuz_cfg.h"
#include "mkuz_conveyor.h"
#include "mkuz_format.h"
#include "mkuz_fqueue.h"
#include "mkuz_time.h"
#define DEFAULT_CLSTSIZE 16384
DEFINE_RAW_METHOD(f_init, void *, uint32_t);
DEFINE_RAW_METHOD(f_compress, void, const char *, uint32_t *);
struct mkuz_format {
const char *magic;
const char *default_sufx;
f_init_t f_init;
f_compress_t f_compress;
};
static struct mkuz_format uzip_fmt = {
.magic = CLOOP_MAGIC_ZLIB,
.default_sufx = DEFAULT_SUFX_ZLIB,
@ -78,41 +75,65 @@ static struct mkuz_format ulzma_fmt = {
.f_compress = &mkuz_lzma_compress
};
static char *readblock(int, char *, u_int32_t);
static struct mkuz_blk *readblock(int, u_int32_t);
static void usage(void);
static void cleanup(void);
static int memvcmp(const void *, unsigned char, size_t);
int mkuz_memvcmp(const void *, unsigned char, size_t);
static char *cleanfile = NULL;
static int
cmp_blkno(const struct mkuz_blk *bp, void *p)
{
uint32_t *ap;
ap = (uint32_t *)p;
return (bp->info.blkno == *ap);
}
int main(int argc, char **argv)
{
char *iname, *oname, *obuf, *ibuf;
struct mkuz_cfg cfs;
char *iname, *oname;
uint64_t *toc;
int fdr, fdw, i, opt, verbose, no_zcomp, tmp, en_dedup;
int i, io, opt, tmp;
struct {
int en;
FILE *f;
} summary;
struct iovec iov[2];
struct stat sb;
uint32_t destlen;
uint64_t offset, last_offset;
struct cloop_header hdr;
struct mkuz_blkcache_hit *chit;
const struct mkuz_format *handler;
struct mkuz_conveyor *cvp;
void *c_ctx;
struct mkuz_blk_info *chit;
size_t ncpusz, ncpu;
double st, et;
st = getdtime();
ncpusz = sizeof(size_t);
if (sysctlbyname("hw.ncpu", &ncpu, &ncpusz, NULL, 0) < 0) {
ncpu = 1;
} else if (ncpu > MAX_WORKERS_AUTO) {
ncpu = MAX_WORKERS_AUTO;
}
memset(&hdr, 0, sizeof(hdr));
hdr.blksz = DEFAULT_CLSTSIZE;
cfs.blksz = DEFAULT_CLSTSIZE;
oname = NULL;
verbose = 0;
no_zcomp = 0;
en_dedup = 0;
cfs.verbose = 0;
cfs.no_zcomp = 0;
cfs.en_dedup = 0;
summary.en = 0;
summary.f = stderr;
handler = &uzip_fmt;
cfs.handler = &uzip_fmt;
cfs.nworkers = ncpu;
struct mkuz_blk *iblk, *oblk;
while((opt = getopt(argc, argv, "o:s:vZdLS")) != -1) {
while((opt = getopt(argc, argv, "o:s:vZdLSj:")) != -1) {
switch(opt) {
case 'o':
oname = optarg;
@ -125,23 +146,23 @@ int main(int argc, char **argv)
optarg);
/* Not reached */
}
hdr.blksz = tmp;
cfs.blksz = tmp;
break;
case 'v':
verbose = 1;
cfs.verbose = 1;
break;
case 'Z':
no_zcomp = 1;
cfs.no_zcomp = 1;
break;
case 'd':
en_dedup = 1;
cfs.en_dedup = 1;
break;
case 'L':
handler = &ulzma_fmt;
cfs.handler = &ulzma_fmt;
break;
case 'S':
@ -149,6 +170,16 @@ int main(int argc, char **argv)
summary.f = stdout;
break;
case 'j':
tmp = atoi(optarg);
if (tmp <= 0) {
errx(1, "invalid number of compression threads"
" specified: %s", optarg);
/* Not reached */
}
cfs.nworkers = tmp;
break;
default:
usage();
/* Not reached */
@ -162,27 +193,25 @@ int main(int argc, char **argv)
/* Not reached */
}
strcpy(hdr.magic, handler->magic);
strcpy(hdr.magic, cfs.handler->magic);
if (en_dedup != 0) {
if (cfs.en_dedup != 0) {
hdr.magic[CLOOP_OFS_VERSN] = CLOOP_MAJVER_3;
hdr.magic[CLOOP_OFS_COMPR] =
tolower(hdr.magic[CLOOP_OFS_COMPR]);
}
obuf = handler->f_init(hdr.blksz);
c_ctx = cfs.handler->f_init(cfs.blksz);
iname = argv[0];
if (oname == NULL) {
asprintf(&oname, "%s%s", iname, handler->default_sufx);
asprintf(&oname, "%s%s", iname, cfs.handler->default_sufx);
if (oname == NULL) {
err(1, "can't allocate memory");
/* Not reached */
}
}
ibuf = mkuz_safe_malloc(hdr.blksz);
signal(SIGHUP, exit);
signal(SIGINT, exit);
signal(SIGTERM, exit);
@ -190,19 +219,19 @@ int main(int argc, char **argv)
signal(SIGXFSZ, exit);
atexit(cleanup);
fdr = open(iname, O_RDONLY);
if (fdr < 0) {
cfs.fdr = open(iname, O_RDONLY);
if (cfs.fdr < 0) {
err(1, "open(%s)", iname);
/* Not reached */
}
if (fstat(fdr, &sb) != 0) {
if (fstat(cfs.fdr, &sb) != 0) {
err(1, "fstat(%s)", iname);
/* Not reached */
}
if (S_ISCHR(sb.st_mode)) {
off_t ms;
if (ioctl(fdr, DIOCGMEDIASIZE, &ms) < 0) {
if (ioctl(cfs.fdr, DIOCGMEDIASIZE, &ms) < 0) {
err(1, "ioctl(DIOCGMEDIASIZE)");
/* Not reached */
}
@ -212,18 +241,18 @@ int main(int argc, char **argv)
iname);
exit(1);
}
hdr.nblocks = sb.st_size / hdr.blksz;
if ((sb.st_size % hdr.blksz) != 0) {
if (verbose != 0)
hdr.nblocks = sb.st_size / cfs.blksz;
if ((sb.st_size % cfs.blksz) != 0) {
if (cfs.verbose != 0)
fprintf(stderr, "file size is not multiple "
"of %d, padding data\n", hdr.blksz);
"of %d, padding data\n", cfs.blksz);
hdr.nblocks++;
}
toc = mkuz_safe_malloc((hdr.nblocks + 1) * sizeof(*toc));
fdw = open(oname, (en_dedup ? O_RDWR : O_WRONLY) | O_TRUNC | O_CREAT,
cfs.fdw = open(oname, (cfs.en_dedup ? O_RDWR : O_WRONLY) | O_TRUNC | O_CREAT,
S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
if (fdw < 0) {
if (cfs.fdw < 0) {
err(1, "open(%s)", oname);
/* Not reached */
}
@ -237,35 +266,32 @@ int main(int argc, char **argv)
offset = iov[0].iov_len + iov[1].iov_len;
/* Reserve space for header */
lseek(fdw, offset, SEEK_SET);
lseek(cfs.fdw, offset, SEEK_SET);
if (verbose != 0)
if (cfs.verbose != 0) {
fprintf(stderr, "data size %ju bytes, number of clusters "
"%u, index length %zu bytes\n", sb.st_size,
hdr.nblocks, iov[1].iov_len);
}
cvp = mkuz_conveyor_ctor(&cfs);
last_offset = 0;
for(i = 0; i == 0 || ibuf != NULL; i++) {
ibuf = readblock(fdr, ibuf, hdr.blksz);
if (ibuf != NULL) {
if (no_zcomp == 0 && \
memvcmp(ibuf, '\0', hdr.blksz) != 0) {
/* All zeroes block */
destlen = 0;
} else {
handler->f_compress(ibuf, &destlen);
}
} else {
destlen = DEV_BSIZE - (offset % DEV_BSIZE);
memset(obuf, 0, destlen);
if (verbose != 0)
fprintf(stderr, "padding data with %lu bytes "
"so that file size is multiple of %d\n",
(u_long)destlen, DEV_BSIZE);
iblk = oblk = NULL;
for(i = io = 0; iblk != MKUZ_BLK_EOF; i++) {
iblk = readblock(cfs.fdr, cfs.blksz);
mkuz_fqueue_enq(cvp->wrk_queue, iblk);
if (iblk != MKUZ_BLK_EOF &&
(i < (cfs.nworkers * ITEMS_PER_WORKER))) {
continue;
}
if (destlen > 0 && en_dedup != 0) {
chit = mkuz_blkcache_regblock(fdw, i, offset, destlen,
obuf);
drain:
oblk = mkuz_fqueue_deq_when(cvp->results, cmp_blkno, &io);
assert(oblk->info.blkno == (unsigned)io);
oblk->info.offset = offset;
chit = NULL;
if (cfs.en_dedup != 0 && oblk->info.len > 0) {
chit = mkuz_blkcache_regblock(cfs.fdw, oblk);
/*
* There should be at least one non-empty block
* between us and the backref'ed offset, otherwise
@ -276,72 +302,107 @@ int main(int argc, char **argv)
if (chit != NULL && chit->offset == last_offset) {
chit = NULL;
}
} else {
chit = NULL;
}
if (chit != NULL) {
toc[i] = htobe64(chit->offset);
toc[io] = htobe64(chit->offset);
oblk->info.len = 0;
} else {
if (destlen > 0 && write(fdw, obuf, destlen) < 0) {
if (oblk->info.len > 0 && write(cfs.fdw, oblk->data,
oblk->info.len) < 0) {
err(1, "write(%s)", oname);
/* Not reached */
}
toc[i] = htobe64(offset);
toc[io] = htobe64(offset);
last_offset = offset;
offset += destlen;
offset += oblk->info.len;
}
if (ibuf != NULL && verbose != 0) {
if (cfs.verbose != 0) {
fprintf(stderr, "cluster #%d, in %u bytes, "
"out len=%lu offset=%lu", i, hdr.blksz,
chit == NULL ? (u_long)destlen : 0,
(u_long)be64toh(toc[i]));
"out len=%lu offset=%lu", io, cfs.blksz,
(u_long)oblk->info.len, (u_long)be64toh(toc[io]));
if (chit != NULL) {
fprintf(stderr, " (backref'ed to #%d)",
chit->blkno);
}
fprintf(stderr, "\n");
}
free(oblk);
io += 1;
if (iblk == MKUZ_BLK_EOF) {
if (io < i)
goto drain;
/* Last block, see if we need to add some padding */
if ((offset % DEV_BSIZE) == 0)
continue;
oblk = mkuz_blk_ctor(DEV_BSIZE - (offset % DEV_BSIZE));
oblk->info.blkno = io;
oblk->info.len = oblk->alen;
if (cfs.verbose != 0) {
fprintf(stderr, "padding data with %lu bytes "
"so that file size is multiple of %d\n",
(u_long)oblk->alen, DEV_BSIZE);
}
mkuz_fqueue_enq(cvp->results, oblk);
goto drain;
}
}
close(fdr);
if (verbose != 0 || summary.en != 0)
close(cfs.fdr);
if (cfs.verbose != 0 || summary.en != 0) {
et = getdtime();
fprintf(summary.f, "compressed data to %ju bytes, saved %lld "
"bytes, %.2f%% decrease.\n", offset,
"bytes, %.2f%% decrease, %.2f bytes/sec.\n", offset,
(long long)(sb.st_size - offset),
100.0 * (long long)(sb.st_size - offset) /
(float)sb.st_size);
(float)sb.st_size, (float)sb.st_size / (et - st));
}
/* Convert to big endian */
hdr.blksz = htonl(hdr.blksz);
hdr.blksz = htonl(cfs.blksz);
hdr.nblocks = htonl(hdr.nblocks);
/* Write headers into pre-allocated space */
lseek(fdw, 0, SEEK_SET);
if (writev(fdw, iov, 2) < 0) {
lseek(cfs.fdw, 0, SEEK_SET);
if (writev(cfs.fdw, iov, 2) < 0) {
err(1, "writev(%s)", oname);
/* Not reached */
}
cleanfile = NULL;
close(fdw);
close(cfs.fdw);
exit(0);
}
static char *
readblock(int fd, char *ibuf, u_int32_t clstsize)
static struct mkuz_blk *
readblock(int fd, u_int32_t clstsize)
{
int numread;
struct mkuz_blk *rval;
static int blockcnt;
off_t cpos;
bzero(ibuf, clstsize);
numread = read(fd, ibuf, clstsize);
rval = mkuz_blk_ctor(clstsize);
rval->info.blkno = blockcnt;
blockcnt += 1;
cpos = lseek(fd, 0, SEEK_CUR);
if (cpos < 0) {
err(1, "readblock: lseek() failed");
/* Not reached */
}
rval->info.offset = cpos;
numread = read(fd, rval->data, clstsize);
if (numread < 0) {
err(1, "read() failed");
err(1, "readblock: read() failed");
/* Not reached */
}
if (numread == 0) {
return NULL;
free(rval);
return MKUZ_BLK_EOF;
}
return ibuf;
rval->info.len = numread;
return rval;
}
static void
@ -349,7 +410,7 @@ usage(void)
{
fprintf(stderr, "usage: mkuzip [-vZdLS] [-o outfile] [-s cluster_size] "
"infile\n");
"[-j ncompr] infile\n");
exit(1);
}
@ -366,6 +427,16 @@ mkuz_safe_malloc(size_t size)
return retval;
}
void *
mkuz_safe_zmalloc(size_t size)
{
void *retval;
retval = mkuz_safe_malloc(size);
bzero(retval, size);
return retval;
}
static void
cleanup(void)
{
@ -374,8 +445,8 @@ cleanup(void)
unlink(cleanfile);
}
static int
memvcmp(const void *memory, unsigned char val, size_t size)
int
mkuz_memvcmp(const void *memory, unsigned char val, size_t size)
{
const u_char *mm;

View file

@ -26,4 +26,8 @@
* $FreeBSD$
*/
#define DEFINE_RAW_METHOD(func, rval, args...) typedef rval (*func##_t)(args)
void *mkuz_safe_malloc(size_t);
void *mkuz_safe_zmalloc(size_t);
int mkuz_memvcmp(const void *, unsigned char, size_t);