git/ewah/ewok_rlw.h
Vicent Marti e1273106f6 ewah: compressed bitmap implementation
EWAH is a word-aligned compressed variant of a bitset (i.e. a data
structure that acts as a 0-indexed boolean array for many entries).

It uses a 64-bit run-length encoding (RLE) compression scheme,
trading some compression for better processing speed.

The goal of this word-aligned implementation is not to achieve
the best compression, but rather to improve query processing time.
As it stands right now, this EWAH implementation will always be more
efficient storage-wise than its uncompressed alternative.

EWAH arrays will be used as the on-disk format to store reachability
bitmaps for all objects in a repository while keeping reasonable sizes,
in the same way that JGit does.

This EWAH implementation is a mostly straightforward port of the
original `javaewah` library that JGit currently uses. The library is
self-contained and has been embedded whole (4 files) inside the `ewah`
folder to ease redistribution.

The library is re-licensed under the GPLv2 with the permission of Daniel
Lemire, the original author. The source code for the C version can
be found on GitHub:

	https://github.com/vmg/libewok

The original Java implementation can also be found on GitHub:

	https://github.com/lemire/javaewah

[jc: stripped debug-only code per Peff's $gmane/239768]

Signed-off-by: Vicent Marti <tanoku@gmail.com>
Signed-off-by: Jeff King <peff@peff.net>
Helped-by: Ramsay Jones <ramsay@ramsay1.demon.co.uk>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-12-30 12:17:20 -08:00

115 lines
3.1 KiB
C

/**
* Copyright 2013, GitHub, Inc
* Copyright 2009-2013, Daniel Lemire, Cliff Moon,
* David McIntosh, Robert Becho, Google Inc. and Veronika Zenz
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#ifndef __EWOK_RLW_H__
#define __EWOK_RLW_H__
#define RLW_RUNNING_BITS (sizeof(eword_t) * 4)
#define RLW_LITERAL_BITS (sizeof(eword_t) * 8 - 1 - RLW_RUNNING_BITS)
#define RLW_LARGEST_RUNNING_COUNT (((eword_t)1 << RLW_RUNNING_BITS) - 1)
#define RLW_LARGEST_LITERAL_COUNT (((eword_t)1 << RLW_LITERAL_BITS) - 1)
#define RLW_LARGEST_RUNNING_COUNT_SHIFT (RLW_LARGEST_RUNNING_COUNT << 1)
#define RLW_RUNNING_LEN_PLUS_BIT (((eword_t)1 << (RLW_RUNNING_BITS + 1)) - 1)
static int rlw_get_run_bit(const eword_t *word)
{
return *word & (eword_t)1;
}
static inline void rlw_set_run_bit(eword_t *word, int b)
{
if (b) {
*word |= (eword_t)1;
} else {
*word &= (eword_t)(~1);
}
}
static inline void rlw_xor_run_bit(eword_t *word)
{
if (*word & 1) {
*word &= (eword_t)(~1);
} else {
*word |= (eword_t)1;
}
}
static inline void rlw_set_running_len(eword_t *word, eword_t l)
{
*word |= RLW_LARGEST_RUNNING_COUNT_SHIFT;
*word &= (l << 1) | (~RLW_LARGEST_RUNNING_COUNT_SHIFT);
}
static inline eword_t rlw_get_running_len(const eword_t *word)
{
return (*word >> 1) & RLW_LARGEST_RUNNING_COUNT;
}
static inline eword_t rlw_get_literal_words(const eword_t *word)
{
return *word >> (1 + RLW_RUNNING_BITS);
}
static inline void rlw_set_literal_words(eword_t *word, eword_t l)
{
*word |= ~RLW_RUNNING_LEN_PLUS_BIT;
*word &= (l << (RLW_RUNNING_BITS + 1)) | RLW_RUNNING_LEN_PLUS_BIT;
}
static inline eword_t rlw_size(const eword_t *self)
{
return rlw_get_running_len(self) + rlw_get_literal_words(self);
}
struct rlw_iterator {
const eword_t *buffer;
size_t size;
size_t pointer;
size_t literal_word_start;
struct {
const eword_t *word;
int literal_words;
int running_len;
int literal_word_offset;
int running_bit;
} rlw;
};
void rlwit_init(struct rlw_iterator *it, struct ewah_bitmap *bitmap);
void rlwit_discard_first_words(struct rlw_iterator *it, size_t x);
size_t rlwit_discharge(
struct rlw_iterator *it, struct ewah_bitmap *out, size_t max, int negate);
void rlwit_discharge_empty(struct rlw_iterator *it, struct ewah_bitmap *out);
static inline size_t rlwit_word_size(struct rlw_iterator *it)
{
return it->rlw.running_len + it->rlw.literal_words;
}
static inline size_t rlwit_literal_words(struct rlw_iterator *it)
{
return it->pointer - it->rlw.literal_words;
}
#endif