bmz.c File Reference

#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <math.h>
#include <time.h>
#include "bmz-internal.h"
#include "../lzo/minilzo.h"

Include dependency graph for bmz.c:

Go to the source code of this file.

Classes

struct  BmLutEntry
struct  BmLut

Defines

#define BM_B1   257
 Copyright (C) 2007 Luke Lu (Zvents, Inc.
#define BM_B2   277
#define BM_M1   0xffff
#define BM_M2   (0xffff - 4)
#define BM_MASK16   0xffff
#define BM_MASK24   0xffffff
#define BM_MASK32   0xffffffff
#define BM_MASKSZ   BM_MASK32
#define BM_ESC   0xfe
#define BM_MAX_LEN   0xfdffffffffffull
#define BM_MAX_1B   0xfd
#define BM_MAX_2B   0xfdff
#define BM_MAX_3B   0xfdffff
#define BM_MAX_4B   0xfdfffffful
#define BM_MAX_5B   0xfdffffffffull
#define BM_MAX_V1B   0x7f
#define BM_MAX_V2B   0x3fff
#define BM_MAX_V3B   0x1fffff
#define BM_MAX_V4B   0xfffffff
#define BM_MAX_V5B   0x7ffffffffull
#define BM_MAX_V6B   0x3ffffffffffull
#define BM_COLLISION_ABORT_THRESH   1
#define BM_ABORT_COLLISIONS   ((BM_COLLISION_ABORT_THRESH + 1) * collision_thresh + 1)
#define BM_COLOR_DBG   "\x1b[31m"
#define BM_COLOR_ALT   "\x1b[32m"
#define BM_COLOR_END   "\x1b[m"
#define BM_NPOS   ((size_t)-1)
#define BM_ALIGN(_mem_, _n_)   (Byte *)(_mem_) + _n_ - (((size_t)(_mem_))%(_n_))
#define BM_LOG(_level_, _fmt_,...)
#define BM_LOG_(_level_, _buf_, _len_)
#define BM_DIE(_fmt_,...)
#define BM_CHECK(_cond_)
#define BM_LOOKUP(_lut_, _hash_, _entry_)
#define R_HASH_MOD_BODY(_int_type_)
#define POW_MOD_BODY(_pow_fun_, _int_type_)
#define UPDATE_HASH_MOD_BODY
#define R_HASH_MASK_BODY(int_type)
#define POW_MASK_BODY(_pow_fun_, _int_type_)
#define UPDATE_HASH_MASK_BODY
#define BM_HASH_CHECK_BODY(_init_hash_, _rehash_, _update_hash_)
#define BM_HASH_BENCH_BODY(_init_hash_, _update_hash_)
#define BM_LUT_BENCH_BODY(_init_hash_, _update_hash_)
#define BM_NEED_IN(_n_)   if ((int)(in_end - ip) < (int)(_n_)) goto input_overrun
#define BM_NEED_OUT(_n_)   if ((int)(out_end - op) < (int)(_n_)) goto output_overrun
#define BM_ENCODE_OUTPUT(_op_, _ip_, _ip_end_)
#define BM_ENCODE_FINAL_OUTPUT(_op_, _ip_, _ip_end_)
#define BM_ENCODE_PASS
#define BM_ENCODE_INT(_op_, _n_, _width_)
#define BM_ENCODE_VINT(_op_, _n_, _width_)
#define BM_DECODE_VINT(_n_)
#define BM_INT_WIDTH(_n_)
#define BM_VINT_WIDTH(_n_)
#define BM_ENCODE_DELTA(_op_, _ipos_, _pos_, _len_)
#define BM_HANDLE_COLLISIONS(_randomize_, _init_hash_)
#define BM_ENCODE_STAT(_msg_)
#define BM_ENCODE_BODY(_init_hash_, _update_hash_, _randomize_)
#define BM_LZ_MAX(_len_)   ((_len_) / 16 + 64 + 3)
#define BMZ_PACK_BODY(_bm_pack_)
#define BM_DECODE_POS(_cpos_, _n_)
#define BM_DECODE_LEN(_n_)   BM_DECODE_VINT(_n_)

Typedefs

typedef uint8_t Byte
typedef uint32_t UInt32
typedef uint64_t UInt64
typedef int64_t Int64
typedef int32_t Int32
typedef long long unsigned Llu

Functions

static void builtin_out (const char *buf, size_t len)
static HT_NORETURN void builtin_die (const char *msg)
int bmz_set_verbosity (int verbosity)
 Set the verbosity of library for testing and debugging.
BmzOutProc bmz_set_out_proc (BmzOutProc proc)
 Set messaging/logging procedure.
int bmz_set_collision_thresh (int thresh)
static size_t random_prime (size_t excluded)
static size_t bm_lut_size (size_t n)
static void init_bmlut (BmLut *table, size_t n, void *work_mem)
static UInt64 r_hash_mod (const Byte *buf, size_t len, size_t b, UInt64 m)
static UInt32 r_hash_mod32 (const Byte *buf, size_t len, UInt32 b, UInt32 m)
static UInt32 r_hash_mod16x2 (const Byte *buf, size_t len, UInt32 b1, UInt32 b2, UInt32 m1, UInt32 m2)
static UInt64 pow_mod (UInt64 x, UInt64 n, UInt64 m)
static UInt32 pow_mod32 (UInt32 x, UInt32 n, UInt32 m)
static Int64 update_hash_mod (Int64 h, Byte in, Byte out, Int64 pow_n, Int64 b, Int64 m)
static Int32 update_hash_mod32 (Int32 h, Byte in, Byte out, Int32 pow_n, Int32 b, Int32 m)
static UInt32 update_hash_mod16x2 (UInt32 h, int in, int out, UInt32 pow1, UInt32 pow2, UInt32 b1, UInt32 b2, UInt32 m1, UInt32 m2)
static UInt64 r_hash_mask (const Byte *buf, size_t len, UInt64 b, UInt64 m)
static UInt32 r_hash_mask32 (const Byte *buf, size_t len, UInt32 b, UInt32 m)
static UInt32 r_hash_mask16x2 (const Byte *buf, size_t len, UInt32 b1, UInt32 b2)
static UInt64 r_hash_mask32x2 (const Byte *buf, size_t len, UInt64 b1, UInt64 b2)
static UInt64 pow_mask (UInt64 x, UInt64 n, UInt64 m)
static UInt32 pow_mask32 (UInt32 x, UInt32 n, UInt32 m)
static UInt64 update_hash_mask (UInt64 h, int in, int out, UInt64 pow_n, UInt64 b, UInt64 m)
static UInt32 update_hash_mask32 (UInt32 h, int in, int out, UInt32 pow_n, UInt32 b, UInt32 m)
static UInt32 update_hash_mask16x2 (UInt32 h, int in, int out, UInt32 pow1, UInt32 pow2, UInt32 b1, UInt32 b2)
static UInt64 update_hash_mask32x2 (UInt64 h, int in, int out, UInt64 pow1, UInt64 pow2, UInt64 b1, UInt64 b2)
size_t bmz_hash_mod (const void *in, size_t in_len, size_t b, size_t m)
size_t bmz_hash_mod16x2 (const void *in, size_t in_len, size_t b1, size_t b2, size_t m1, size_t m2)
size_t bmz_hash_mask16x2 (const void *in, size_t in_len, size_t b1, size_t b2)
size_t bmz_hash_mask (const void *in, size_t in_len, size_t b)
size_t bmz_hash_mask32x2 (const void *in, size_t in_len, size_t b1, size_t b2)
int bmz_check_hash_mod (const void *src, size_t in_len, size_t fp_len, size_t b, size_t m)
void bmz_bench_hash_mod (const void *src, size_t in_len, size_t fp_len)
int bmz_check_hash_mod16x2 (const void *src, size_t in_len, size_t fp_len, size_t b1, size_t b2, size_t m1, size_t m2)
void bmz_bench_hash_mod16x2 (const void *src, size_t in_len, size_t fp_len)
int bmz_check_hash_mask16x2 (const void *src, size_t in_len, size_t fp_len, size_t b1, size_t b2)
void bmz_bench_hash_mask16x2 (const void *src, size_t in_len, size_t fp_len)
int bmz_check_hash_mask (const void *src, size_t in_len, size_t fp_len, size_t b)
void bmz_bench_hash_mask (const void *src, size_t in_len, size_t fp_len)
int bmz_check_hash_mask32x2 (const void *src, size_t in_len, size_t fp_len, size_t b1, size_t b2)
void bmz_bench_hash_mask32x2 (const void *src, size_t in_len, size_t fp_len)
void bmz_bench_hash (const void *in, size_t in_len, unsigned type)
void bmz_bench_lut_mod (const void *src, size_t in_len, size_t fp_len, void *work_mem, size_t b, size_t m)
void bmz_bench_lut_mod16x2 (const void *src, size_t in_len, size_t fp_len, void *work_mem, size_t b1, size_t b2, size_t m1, size_t m2)
void bmz_bench_lut_mask16x2 (const void *src, size_t in_len, size_t fp_len, void *work_mem, size_t b1, size_t b2)
void bmz_bench_lut_mask (const void *src, size_t in_len, size_t fp_len, void *work_mem, size_t b)
void bmz_bench_lut_mask32x2 (const void *src, size_t in_len, size_t fp_len, void *work_mem, size_t b1, size_t b2)
int bmz_bm_pack_mod (const void *src, size_t in_len, void *dst, size_t *out_len_p, size_t offset, size_t fp_len, void *work_mem, size_t b, size_t m)
int bmz_bm_pack_mod16x2 (const void *src, size_t in_len, void *dst, size_t *out_len_p, size_t offset, size_t fp_len, void *work_mem, size_t b1, size_t b2, size_t m1, size_t m2)
int bmz_bm_pack_mask16x2 (const void *src, size_t in_len, void *dst, size_t *out_len_p, size_t offset, size_t fp_len, void *work_mem, size_t b1, size_t b2)
int bmz_bm_pack_mask (const void *src, size_t in_len, void *dst, size_t *out_len_p, size_t offset, size_t fp_len, void *work_mem, size_t b)
int bmz_bm_pack_mask32x2 (const void *src, size_t in_len, void *dst, size_t *out_len_p, size_t offset, size_t fp_len, void *work_mem, size_t b1, size_t b2)
size_t bmz_pack_buflen (size_t in_len)
 Compute bmz compression output buffer length.
size_t bmz_bm_pack_worklen (size_t in_len, size_t fp_len)
static size_t bmz_pack_auxlen (size_t in_len, size_t fp_len)
size_t bmz_pack_worklen (size_t in_len, size_t fp_len)
 Return size of work memory for bmz compression.
size_t bmz_unpack_worklen (size_t out_len)
 Return size of work memory for bmz decompression.
int bmz_pack_mod (const void *in, size_t in_len, void *out, size_t *out_len_p, size_t offset, size_t fp_len, unsigned flags, void *work_mem, size_t b, size_t m)
int bmz_pack_mod16x2 (const void *in, size_t in_len, void *out, size_t *out_len_p, size_t offset, size_t fp_len, unsigned flags, void *work_mem, size_t b1, size_t b2, size_t m1, size_t m2)
int bmz_pack_mask16x2 (const void *in, size_t in_len, void *out, size_t *out_len_p, size_t offset, size_t fp_len, unsigned flags, void *work_mem, size_t b1, size_t b2)
int bmz_pack_mask (const void *in, size_t in_len, void *out, size_t *out_len_p, size_t offset, size_t fp_len, unsigned flags, void *work_mem, size_t b)
int bmz_pack_mask32x2 (const void *in, size_t in_len, void *out, size_t *out_len_p, size_t offset, size_t fp_len, unsigned flags, void *work_mem, size_t b1, size_t b2)
int bmz_pack (const void *in, size_t in_len, void *out, size_t *out_len_p, size_t offset, size_t fp_len, unsigned flags, void *work_mem)
 Perform bmz compression.
int bmz_unpack (const void *in, size_t in_len, void *out, size_t *out_len_p, void *work_mem)
 Perform bmz decompression.
int bmz_bm_unpack (const void *src, size_t in_len, void *dst, size_t *out_len_p)
int bmz_bm_dump (const void *src, size_t in_len)
int bmz_init ()
 Perform bmz initialization only needs to be called once, mostly for sanity checks.
int bmz_lz_pack (const void *in, size_t in_len, void *out, size_t *out_len_p, void *work_mem)
size_t bmz_lz_pack_worklen (size_t in_len)
int bmz_lz_unpack (const void *in, size_t in_len, void *out, size_t *out_len_p)
unsigned bmz_checksum (const void *in, size_t in_len)
 A fast checksum (adler32) function that might be useful.
unsigned bmz_update_checksum (unsigned s, const void *in, size_t in_len)

Variables

static size_t s_primes []
static size_t s_collision_thresh = 0
static int s_verbosity = 0
static BmzOutProc s_out_proc = builtin_out
static BmzDieProc s_die_proc = builtin_die


Define Documentation

#define BM_ABORT_COLLISIONS   ((BM_COLLISION_ABORT_THRESH + 1) * collision_thresh + 1)

Definition at line 71 of file bmz.c.

#define BM_ALIGN ( _mem_,
_n_   )     (Byte *)(_mem_) + _n_ - (((size_t)(_mem_))%(_n_))

Definition at line 93 of file bmz.c.

#define BM_B1   257

Copyright (C) 2007 Luke Lu (Zvents, Inc.

)

This file is part of Hypertable.

Hypertable is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2 of the License, or any later version.

Hypertable is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License along with Hypertable. If not, see <http://www.gnu.org/licenses/> An effective/efficient block compressor for input containing long common strings (e.g. web pages from a website)

cf. Bentley & McIlroy, "Data Compression Using Long Common Strings", 1999 cf. BMDiff & Zippy mentioned in the Bigtable paper

Definition at line 40 of file bmz.c.

#define BM_B2   277

Definition at line 41 of file bmz.c.

#define BM_CHECK ( _cond_   ) 

Value:

if (_cond_); else BM_DIE("%s:%d: expects: %s", \
                             __FILE__, __LINE__, #_cond_)

Definition at line 238 of file bmz.c.

#define BM_COLLISION_ABORT_THRESH   1

Definition at line 70 of file bmz.c.

#define BM_COLOR_ALT   "\x1b[32m"

Definition at line 76 of file bmz.c.

#define BM_COLOR_DBG   "\x1b[31m"

Definition at line 75 of file bmz.c.

#define BM_COLOR_END   "\x1b[m"

Definition at line 77 of file bmz.c.

#define BM_DECODE_LEN ( _n_   )     BM_DECODE_VINT(_n_)

Definition at line 1325 of file bmz.c.

#define BM_DECODE_POS ( _cpos_,
_n_   ) 

Definition at line 1282 of file bmz.c.

#define BM_DECODE_VINT ( _n_   ) 

Value:

do { \
  BM_NEED_IN(1); \
  _n_ = (*ip++ & 0x7f); \
  if (ip[-1] & 0x80) { \
    BM_NEED_IN(1); \
    _n_ |= ((*ip++ & 0x7f) << 7); \
  } else break; \
  if (ip[-1] & 0x80) { \
    BM_NEED_IN(1); \
    _n_ |= ((*ip++ & 0x7f) << 14); \
  } else break; \
  if (ip[-1] & 0x80) { \
    BM_NEED_IN(1); \
    _n_ |= ((*ip++ & 0x7f) << 21); \
  } else break; \
  if (ip[-1] & 0x80) { \
    BM_NEED_IN(1); \
    _n_ |= ((UInt64)(*ip++ & 0x7f) << 28); \
  } else break; \
  if (ip[-1] & 0x80) { \
    BM_NEED_IN(1); \
    _n_ |= ((UInt64)(*ip++ & 0x7f) << 35); \
  } else break; \
  if (ip[-1] & 0x80) { \
    BM_NEED_IN(1); \
    _n_ |= ((UInt64)(*ip++ & 0x7f) << 42); \
  } \
} while (0)

Definition at line 924 of file bmz.c.

#define BM_DIE ( _fmt_,
...   ) 

Value:

do { \
  char msg[256]; \
  snprintf(msg, sizeof(msg), "fatal: %s: " _fmt_ "\n", \
           __FUNCTION__, ##__VA_ARGS__); \
  s_die_proc(msg); \
} while (0)

Definition at line 231 of file bmz.c.

#define BM_ENCODE_BODY ( _init_hash_,
_update_hash_,
_randomize_   ) 

Definition at line 1017 of file bmz.c.

#define BM_ENCODE_DELTA ( _op_,
_ipos_,
_pos_,
_len_   ) 

Value:

do { \
  UInt64 _ipos = _ipos_, _len = _len_; \
  int pos_w = BM_INT_WIDTH(_ipos); \
  int len_w = BM_VINT_WIDTH(_len); \
  BM_NEED_OUT(pos_w + len_w + 1); \
  *_op_++ = BM_ESC; \
  BM_ENCODE_INT(_op_, _pos_, pos_w); \
  BM_ENCODE_VINT(_op_, _len_, len_w); \
} while (0)

Definition at line 968 of file bmz.c.

#define BM_ENCODE_FINAL_OUTPUT ( _op_,
_ip_,
_ip_end_   ) 

Value:

BM_NEED_OUT((_ip_end_) - (_ip_)); /* at least */ \
  while (_ip_ < _ip_end_) { \
    if (*_ip_ == BM_ESC) { \
      BM_NEED_OUT((_ip_end_) - (_ip_) + 1); \
      *_op_++ = BM_ESC; \
    } \
    *_op_++ = *_ip_++; \
  }

Definition at line 815 of file bmz.c.

#define BM_ENCODE_INT ( _op_,
_n_,
_width_   ) 

Definition at line 833 of file bmz.c.

#define BM_ENCODE_OUTPUT ( _op_,
_ip_,
_ip_end_   ) 

Value:

while (_ip_ < _ip_end_) { \
    if (*_ip_ == BM_ESC) { \
      BM_NEED_OUT(1); \
      *_op_++ = BM_ESC; \
    } \
    BM_NEED_OUT(1); \
    *_op_++ = *_ip_++; \
  }

Definition at line 804 of file bmz.c.

#define BM_ENCODE_PASS

Value:

do { \
  *out = 0; \
  memcpy(out + 1, in, in_len); \
  *out_len_p = in_len + 1; \
  BM_ENCODE_STAT("no reduction"); \
  return BMZ_E_OK; \
} while (0)

Definition at line 825 of file bmz.c.

#define BM_ENCODE_STAT ( _msg_   ) 

Value:

do { \
  size_t c = lut.collisions; \
  BM_LOG(1, "%s: in: %llu, out: %llu, lut.collisions: %llu (eff. bits: %.3f) " \
         "thresh: %llu), lut.probes: %llu (%.3f/lu)\n", _msg_, (Llu)in_len, \
         (Llu)*out_len_p, (Llu)c, \
         c ? log((double)nfps * scan_len / c) / log(2) : sizeof(size_t) * 8., \
         (Llu)collision_thresh, (Llu)lut.probes, \
         (double)lut.probes / scan_len); \
} while (0)

Definition at line 1006 of file bmz.c.

#define BM_ENCODE_VINT ( _op_,
_n_,
_width_   ) 

Definition at line 874 of file bmz.c.

#define BM_ESC   0xfe

Definition at line 53 of file bmz.c.

#define BM_HANDLE_COLLISIONS ( _randomize_,
_init_hash_   ) 

Value:

do { \
  size_t adapts = lut.adapts, collisions = ++lut.collisions; \
  \
  if (adapts > BM_COLLISION_ABORT_THRESH) { \
    /* stumped & give up, just pass the bytes */ \
    BM_LOG(1, "too much collisions: %llu, giving up.\n", \
           (Llu)BM_ABORT_COLLISIONS); \
    goto output_overrun; \
  } \
  else if (collisions > collision_thresh) { \
    /* unlikely when the hash is good, so reinitialize hash, in case \
     * data is wacked/adversarial for some reasons. \
     */ \
    BM_LOG(2, "hash collision %llu: %llx: pos: %llu and %llu: [" BM_COLOR_DBG, \
           (Llu)lut.collisions, (Llu)hash, (Llu)pos, (Llu)i); \
    BM_LOG_(2, in + pos, fp_len); \
    BM_LOG(2, "%s", BM_COLOR_END "] vs [" BM_COLOR_ALT); \
    BM_LOG_(2, in + i, fp_len); \
    BM_LOG(2, "%s", BM_COLOR_END "] trying to adapt.\n"); \
    _randomize_; \
    boundary = last_i = i = offset; \
    _init_hash_; \
    init_bmlut(&lut, in_len / fp_len, work_mem); \
    lut.adapts = adapts + 1; \
    op = out + 1; \
    last_ip = in; \
    continue; \
  } \
} while (0)

Definition at line 978 of file bmz.c.

#define BM_HASH_BENCH_BODY ( _init_hash_,
_update_hash_   ) 

Value:

size_t hash; \
  Byte *in = (Byte *)src, *ip = in, *in_end = in + in_len; \
  \
  BM_CHECK(fp_len < in_len); \
  _init_hash_; \
  \
  for (; ip < in_end - fp_len - 1; ++ip) { \
    hash = _update_hash_; \
  } \
  BM_LOG(1, "last hash: %lx\n", (long)hash)

Definition at line 557 of file bmz.c.

#define BM_HASH_CHECK_BODY ( _init_hash_,
_rehash_,
_update_hash_   ) 

Value:

size_t hash; \
  Byte *in = (Byte *)src, *ip = in, *in_end = in + in_len; \
  int errs = 0; \
  \
  BM_CHECK(fp_len < in_len); \
  _init_hash_; \
  \
  for (; ip < in_end - fp_len - 1; ++ip) { \
    size_t h0 = _rehash_; \
    hash = _update_hash_; \
    if (h0 != hash) { \
      BM_LOG(0, "mismatch at pos %ld: %lx vs %lx\n", \
             (long)(ip - in), (long)h0, (long)hash); \
      ++errs; \
    } \
  } \
  BM_LOG(1, "total errors: %d\n", errs); \
  return errs ? BMZ_E_ERROR : BMZ_E_OK

Definition at line 537 of file bmz.c.

#define BM_INT_WIDTH ( _n_   ) 

Value:

(_n_ > BM_MAX_5B ? 6 : \
   (_n_ > BM_MAX_4B ? 5 : \
    (_n_ > BM_MAX_3B ? 4 : \
     (_n_ > BM_MAX_2B ? 3 : \
      (_n_ > BM_MAX_1B ? 2 : 1)))))

Definition at line 953 of file bmz.c.

#define BM_LOG ( _level_,
_fmt_,
...   ) 

Value:

if (s_verbosity >= _level_) do { \
  char msg[256]; \
  int len = snprintf(msg, sizeof(msg), "%s: " _fmt_, __FUNCTION__, \
                     ##__VA_ARGS__); \
  s_out_proc(msg, len); \
} while (0)

Definition at line 220 of file bmz.c.

#define BM_LOG_ ( _level_,
_buf_,
_len_   ) 

Value:

if (s_verbosity >= _level_) do { \
  s_out_proc((char *)_buf_, _len_); \
} while (0)

Definition at line 227 of file bmz.c.

#define BM_LOOKUP ( _lut_,
_hash_,
_entry_   ) 

Value:

do { \
  size_t sz = _lut_.size, idx = (_hash_) & (sz - 1), probes = 0; \
  BmLutEntry *b = _lut_.entries, *p = b + idx, *endp = b + sz; \
  \
  while (p->pos != BM_NPOS) { \
    if (p->hash == _hash_) break; \
    ++p; \
    if (p >= endp) p = b; \
    ++probes; \
  } \
  if (probes) _lut_.probes += probes; \
  _entry_ = p; \
} while (0)

Definition at line 331 of file bmz.c.

#define BM_LUT_BENCH_BODY ( _init_hash_,
_update_hash_   ) 

Value:

size_t hash; \
  Byte *in = (Byte *)src, *ip = in, *in_end = in + in_len; \
  size_t scan_len = in_len - fp_len, c, boundary; \
  BmLut lut; \
  BmLutEntry *entry; \
  \
  BM_CHECK(fp_len < in_len); \
  init_bmlut(&lut, in_len / fp_len, work_mem); \
  _init_hash_; \
  \
  for (boundary = fp_len; ip < in_end - fp_len - 1; ++ip) { \
    size_t pos = ip - in; \
    BM_LOOKUP(lut, hash, entry); \
    \
    if (entry->pos != BM_NPOS) { \
      if (memcmp(in + entry->pos, ip, fp_len) != 0) ++lut.collisions; \
    } \
    if (pos == boundary) { \
      entry->hash = hash; \
      entry->pos = pos; \
      boundary += fp_len; \
    } \
    hash = _update_hash_; \
  } \
  c = lut.collisions; \
  BM_LOG(1, "length: %llu, lut.size: %llu, lut.collisions: %llu (eff. bits: " \
         "%.3f), lut.probes: %llu (%.3f/lu)\n", (Llu)in_len, (Llu)lut.size, \
         (Llu)c, c ? log((double)in_len / fp_len * scan_len / c) / log(2) \
                   : sizeof(size_t) * 8., \
         (Llu)lut.probes, (double)lut.probes / scan_len)

Definition at line 705 of file bmz.c.

#define BM_LZ_MAX ( _len_   )     ((_len_) / 16 + 64 + 3)

Definition at line 1156 of file bmz.c.

#define BM_M1   0xffff

Definition at line 42 of file bmz.c.

#define BM_M2   (0xffff - 4)

Definition at line 43 of file bmz.c.

#define BM_MASK16   0xffff

Definition at line 45 of file bmz.c.

#define BM_MASK24   0xffffff

Definition at line 46 of file bmz.c.

#define BM_MASK32   0xffffffff

Definition at line 47 of file bmz.c.

#define BM_MASKSZ   BM_MASK32

Definition at line 48 of file bmz.c.

#define BM_MAX_1B   0xfd

Definition at line 55 of file bmz.c.

#define BM_MAX_2B   0xfdff

Definition at line 56 of file bmz.c.

#define BM_MAX_3B   0xfdffff

Definition at line 57 of file bmz.c.

#define BM_MAX_4B   0xfdfffffful

Definition at line 58 of file bmz.c.

#define BM_MAX_5B   0xfdffffffffull

Definition at line 59 of file bmz.c.

#define BM_MAX_LEN   0xfdffffffffffull

Definition at line 54 of file bmz.c.

#define BM_MAX_V1B   0x7f

Definition at line 62 of file bmz.c.

#define BM_MAX_V2B   0x3fff

Definition at line 63 of file bmz.c.

#define BM_MAX_V3B   0x1fffff

Definition at line 64 of file bmz.c.

#define BM_MAX_V4B   0xfffffff

Definition at line 65 of file bmz.c.

#define BM_MAX_V5B   0x7ffffffffull

Definition at line 66 of file bmz.c.

#define BM_MAX_V6B   0x3ffffffffffull

Definition at line 67 of file bmz.c.

#define BM_NEED_IN ( _n_   )     if ((int)(in_end - ip) < (int)(_n_)) goto input_overrun

Definition at line 798 of file bmz.c.

#define BM_NEED_OUT ( _n_   )     if ((int)(out_end - op) < (int)(_n_)) goto output_overrun

Definition at line 801 of file bmz.c.

#define BM_NPOS   ((size_t)-1)

Definition at line 90 of file bmz.c.

#define BM_VINT_WIDTH ( _n_   ) 

Value:

(_n_ > BM_MAX_V6B ? 7 : \
   (_n_ > BM_MAX_V5B ? 6 : \
    (_n_ > BM_MAX_V4B ? 5 : \
     (_n_ > BM_MAX_V3B ? 4 : \
      (_n_ > BM_MAX_V2B ? 3 : \
       (_n_ > BM_MAX_V1B ? 2 : 1))))))

Definition at line 960 of file bmz.c.

#define BMZ_PACK_BODY ( _bm_pack_   ) 

Value:

size_t tlen = in_len + 1; \
  Byte *dst = (Byte *)work_mem; \
  Byte *aux_mem = dst + tlen; \
  Byte *work_mem_aligned = BM_ALIGN(aux_mem, 8); \
  int ret; \
  \
  /* overlap flag assume the following memory layout    \
   * |=============== input/output =================|   \
   * |<------------- bmz_pack_buflen -------------->|   \
   * |<---------------- in_len ---------------->|       \
   * |<-- *out_len_p -->|                               \
   */                                                   \
  ret = _bm_pack_; \
  if (ret != BMZ_E_OK) return ret; \
  return bmz_lz_pack(dst, tlen, out, out_len_p, work_mem_aligned)

Definition at line 1191 of file bmz.c.

#define POW_MASK_BODY ( _pow_fun_,
_int_type_   ) 

Value:

if (n == 0) return 1; \
  if (n == 1) return (x & m); \
  { \
    _int_type_ sqr = (x * x) & m; \
    _int_type_ pow = _pow_fun_(sqr, n / 2, m); \
    if (n & 1) return ((pow * x) & m); \
    return (pow & m); \
  }

Definition at line 458 of file bmz.c.

#define POW_MOD_BODY ( _pow_fun_,
_int_type_   ) 

Value:

if (n == 0) return 1; \
  if (n == 1) return x % m; \
  { \
    _int_type_ sqr = (x * x) % m; \
    _int_type_ pow = _pow_fun_(sqr, n / 2, m); \
    if (n & 1) return (pow * x) % m; \
    return pow % m; \
  }

Definition at line 376 of file bmz.c.

#define R_HASH_MASK_BODY ( int_type   ) 

Value:

int_type h = 0, p = 1; \
  \
  while (len--) { \
    h += p * buf[len]; \
    h &= m; \
    p *= b; \
    p &= m; \
  } \
  return h

Definition at line 424 of file bmz.c.

#define R_HASH_MOD_BODY ( _int_type_   ) 

Value:

_int_type_ h = 0, p = 1; \
  \
  while (len--) { \
    h += p * buf[len]; \
    h %= m; \
    p *= b; \
    p %= m; \
  } \
  return h

Definition at line 346 of file bmz.c.

#define UPDATE_HASH_MASK_BODY

Value:

h *= b; \
  h -= (out * pow_n) & m; \
  h += in; \
  return (h & m)

Definition at line 478 of file bmz.c.

#define UPDATE_HASH_MOD_BODY

Value:

h *= b; \
  h -= (out * pow_n) % m; \
  h += in; \
  if (h < 0) h += m; \
  return (h % m)

Definition at line 397 of file bmz.c.


Typedef Documentation

typedef uint8_t Byte

Definition at line 80 of file bmz.c.

typedef int32_t Int32

Definition at line 84 of file bmz.c.

typedef int64_t Int64

Definition at line 83 of file bmz.c.

typedef long long unsigned Llu

Definition at line 87 of file bmz.c.

typedef uint32_t UInt32

Definition at line 81 of file bmz.c.

typedef uint64_t UInt64

Definition at line 82 of file bmz.c.


Function Documentation

static size_t bm_lut_size ( size_t  n  )  [static]

Definition at line 290 of file bmz.c.

void bmz_bench_hash ( const void *  in,
size_t  in_len,
unsigned  type 
)

Definition at line 683 of file bmz.c.

void bmz_bench_hash_mask ( const void *  src,
size_t  in_len,
size_t  fp_len 
)

Definition at line 650 of file bmz.c.

void bmz_bench_hash_mask16x2 ( const void *  src,
size_t  in_len,
size_t  fp_len 
)

Definition at line 628 of file bmz.c.

void bmz_bench_hash_mask32x2 ( const void *  src,
size_t  in_len,
size_t  fp_len 
)

Definition at line 672 of file bmz.c.

void bmz_bench_hash_mod ( const void *  src,
size_t  in_len,
size_t  fp_len 
)

Definition at line 581 of file bmz.c.

void bmz_bench_hash_mod16x2 ( const void *  src,
size_t  in_len,
size_t  fp_len 
)

Definition at line 604 of file bmz.c.

void bmz_bench_lut_mask ( const void *  src,
size_t  in_len,
size_t  fp_len,
void *  work_mem,
size_t  b 
)

Definition at line 773 of file bmz.c.

void bmz_bench_lut_mask16x2 ( const void *  src,
size_t  in_len,
size_t  fp_len,
void *  work_mem,
size_t  b1,
size_t  b2 
)

Definition at line 761 of file bmz.c.

void bmz_bench_lut_mask32x2 ( const void *  src,
size_t  in_len,
size_t  fp_len,
void *  work_mem,
size_t  b1,
size_t  b2 
)

Definition at line 783 of file bmz.c.

void bmz_bench_lut_mod ( const void *  src,
size_t  in_len,
size_t  fp_len,
void *  work_mem,
size_t  b,
size_t  m 
)

Definition at line 738 of file bmz.c.

void bmz_bench_lut_mod16x2 ( const void *  src,
size_t  in_len,
size_t  fp_len,
void *  work_mem,
size_t  b1,
size_t  b2,
size_t  m1,
size_t  m2 
)

Definition at line 748 of file bmz.c.

int bmz_bm_dump ( const void *  src,
size_t  in_len 
)

Definition at line 1386 of file bmz.c.

int bmz_bm_pack_mask ( const void *  src,
size_t  in_len,
void *  dst,
size_t *  out_len_p,
size_t  offset,
size_t  fp_len,
void *  work_mem,
size_t  b 
)

Definition at line 1131 of file bmz.c.

int bmz_bm_pack_mask16x2 ( const void *  src,
size_t  in_len,
void *  dst,
size_t *  out_len_p,
size_t  offset,
size_t  fp_len,
void *  work_mem,
size_t  b1,
size_t  b2 
)

Definition at line 1117 of file bmz.c.

int bmz_bm_pack_mask32x2 ( const void *  src,
size_t  in_len,
void *  dst,
size_t *  out_len_p,
size_t  offset,
size_t  fp_len,
void *  work_mem,
size_t  b1,
size_t  b2 
)

Definition at line 1142 of file bmz.c.

int bmz_bm_pack_mod ( const void *  src,
size_t  in_len,
void *  dst,
size_t *  out_len_p,
size_t  offset,
size_t  fp_len,
void *  work_mem,
size_t  b,
size_t  m 
)

Definition at line 1091 of file bmz.c.

int bmz_bm_pack_mod16x2 ( const void *  src,
size_t  in_len,
void *  dst,
size_t *  out_len_p,
size_t  offset,
size_t  fp_len,
void *  work_mem,
size_t  b1,
size_t  b2,
size_t  m1,
size_t  m2 
)

Definition at line 1102 of file bmz.c.

size_t bmz_bm_pack_worklen ( size_t  in_len,
size_t  fp_len 
)

Definition at line 1167 of file bmz.c.

int bmz_bm_unpack ( const void *  src,
size_t  in_len,
void *  dst,
size_t *  out_len_p 
)

Definition at line 1328 of file bmz.c.

int bmz_check_hash_mask ( const void *  src,
size_t  in_len,
size_t  fp_len,
size_t  b 
)

Definition at line 639 of file bmz.c.

int bmz_check_hash_mask16x2 ( const void *  src,
size_t  in_len,
size_t  fp_len,
size_t  b1,
size_t  b2 
)

Definition at line 615 of file bmz.c.

int bmz_check_hash_mask32x2 ( const void *  src,
size_t  in_len,
size_t  fp_len,
size_t  b1,
size_t  b2 
)

Definition at line 659 of file bmz.c.

int bmz_check_hash_mod ( const void *  src,
size_t  in_len,
size_t  fp_len,
size_t  b,
size_t  m 
)

Definition at line 570 of file bmz.c.

int bmz_check_hash_mod16x2 ( const void *  src,
size_t  in_len,
size_t  fp_len,
size_t  b1,
size_t  b2,
size_t  m1,
size_t  m2 
)

Definition at line 591 of file bmz.c.

unsigned bmz_checksum ( const void *  in,
size_t  in_len 
)

A fast checksum (adler32) function that might be useful.

Parameters:
in - input buffer
in_len - input buffer length in bytes

Definition at line 1463 of file bmz.c.

size_t bmz_hash_mask ( const void *  in,
size_t  in_len,
size_t  b 
)

Definition at line 527 of file bmz.c.

size_t bmz_hash_mask16x2 ( const void *  in,
size_t  in_len,
size_t  b1,
size_t  b2 
)

Definition at line 522 of file bmz.c.

size_t bmz_hash_mask32x2 ( const void *  in,
size_t  in_len,
size_t  b1,
size_t  b2 
)

Definition at line 532 of file bmz.c.

size_t bmz_hash_mod ( const void *  in,
size_t  in_len,
size_t  b,
size_t  m 
)

Definition at line 511 of file bmz.c.

size_t bmz_hash_mod16x2 ( const void *  in,
size_t  in_len,
size_t  b1,
size_t  b2,
size_t  m1,
size_t  m2 
)

Definition at line 516 of file bmz.c.

int bmz_init (  ) 

Perform bmz initialization only needs to be called once, mostly for sanity checks.

Definition at line 1435 of file bmz.c.

int bmz_lz_pack ( const void *  in,
size_t  in_len,
void *  out,
size_t *  out_len_p,
void *  work_mem 
)

Definition at line 1441 of file bmz.c.

size_t bmz_lz_pack_worklen ( size_t  in_len  ) 

Definition at line 1450 of file bmz.c.

int bmz_lz_unpack ( const void *  in,
size_t  in_len,
void *  out,
size_t *  out_len_p 
)

Definition at line 1455 of file bmz.c.

int bmz_pack ( const void *  in,
size_t  in_len,
void *  out,
size_t *  out_len_p,
size_t  offset,
size_t  fp_len,
unsigned  flags,
void *  work_mem 
)

Perform bmz compression.

Parameters:
in - input buffer
in_len - input buffer length in bytes
out - output buffer for compressed data
out_len_p - pointer to the length of output, which specifies the size of the output buffer as input and is set to the length of compressed data on return
offset - starting offset of fingerprints, use 0 if you have to ask
fp_len - fingerprint length, use 50 if you have to ask
flags - compression options. See BMZ_F_* defines.
work_mem - pointer to work memory buffer, cf. bmz_pack_worklen
Returns:
error code

Definition at line 1244 of file bmz.c.

static size_t bmz_pack_auxlen ( size_t  in_len,
size_t  fp_len 
) [static]

Definition at line 1173 of file bmz.c.

size_t bmz_pack_buflen ( size_t  in_len  ) 

Compute bmz compression output buffer length.

Parameters:
in_len - input buffer length in bytes
Returns:
output length in bytes

Definition at line 1159 of file bmz.c.

int bmz_pack_mask ( const void *  in,
size_t  in_len,
void *  out,
size_t *  out_len_p,
size_t  offset,
size_t  fp_len,
unsigned  flags,
void *  work_mem,
size_t  b 
)

Definition at line 1228 of file bmz.c.

int bmz_pack_mask16x2 ( const void *  in,
size_t  in_len,
void *  out,
size_t *  out_len_p,
size_t  offset,
size_t  fp_len,
unsigned  flags,
void *  work_mem,
size_t  b1,
size_t  b2 
)

Definition at line 1220 of file bmz.c.

int bmz_pack_mask32x2 ( const void *  in,
size_t  in_len,
void *  out,
size_t *  out_len_p,
size_t  offset,
size_t  fp_len,
unsigned  flags,
void *  work_mem,
size_t  b1,
size_t  b2 
)

Definition at line 1236 of file bmz.c.

int bmz_pack_mod ( const void *  in,
size_t  in_len,
void *  out,
size_t *  out_len_p,
size_t  offset,
size_t  fp_len,
unsigned  flags,
void *  work_mem,
size_t  b,
size_t  m 
)

Definition at line 1204 of file bmz.c.

int bmz_pack_mod16x2 ( const void *  in,
size_t  in_len,
void *  out,
size_t *  out_len_p,
size_t  offset,
size_t  fp_len,
unsigned  flags,
void *  work_mem,
size_t  b1,
size_t  b2,
size_t  m1,
size_t  m2 
)

Definition at line 1212 of file bmz.c.

size_t bmz_pack_worklen ( size_t  in_len,
size_t  fp_len 
)

Return size of work memory for bmz compression.

So we don't have to allocate memory per invocation when the input buffer size is fixed.

Parameters:
in_len - input buffer length in bytes
fp_len - fingerprint length in bytes
Returns:
length in bytes

Definition at line 1181 of file bmz.c.

int bmz_set_collision_thresh ( int  thresh  ) 

Definition at line 257 of file bmz.c.

BmzOutProc bmz_set_out_proc ( BmzOutProc  proc  ) 

Set messaging/logging procedure.

Parameters:
proc - pointer to a message function
Returns:
old proc

Definition at line 250 of file bmz.c.

int bmz_set_verbosity ( int  verbosity  ) 

Set the verbosity of library for testing and debugging.

Parameters:
verbosity - 0, 1, 2
Returns:
old verbosity

Definition at line 243 of file bmz.c.

int bmz_unpack ( const void *  in,
size_t  in_len,
void *  out,
size_t *  out_len_p,
void *  work_mem 
)

Perform bmz decompression.

Parameters:
in - input buffer (compressed)
in_len - input buffer length in bytes
out - output buffer
out_len_p - pointer to the length of output, which specifies the size of the output buffer and is set to length of uncompressed data on return;
work_mem - pointer to work memory buffer, cf. bmz_unpack_worklen
Returns:
error code

Definition at line 1270 of file bmz.c.

size_t bmz_unpack_worklen ( size_t  out_len  ) 

Return size of work memory for bmz decompression.

Parameters:
out_len - work memory length in bytes
Returns:
length in bytes

Definition at line 1187 of file bmz.c.

unsigned bmz_update_checksum ( unsigned  s,
const void *  in,
size_t  in_len 
)

Definition at line 1468 of file bmz.c.

static HT_NORETURN void builtin_die ( const char *  msg  )  [static]

Definition at line 212 of file bmz.c.

static void builtin_out ( const char *  buf,
size_t  len 
) [static]

Definition at line 207 of file bmz.c.

static void init_bmlut ( BmLut table,
size_t  n,
void *  work_mem 
) [static]

Definition at line 317 of file bmz.c.

static UInt64 pow_mask ( UInt64  x,
UInt64  n,
UInt64  m 
) [static]

Definition at line 469 of file bmz.c.

static UInt32 pow_mask32 ( UInt32  x,
UInt32  n,
UInt32  m 
) [static]

Definition at line 474 of file bmz.c.

static UInt64 pow_mod ( UInt64  x,
UInt64  n,
UInt64  m 
) [static]

Definition at line 387 of file bmz.c.

static UInt32 pow_mod32 ( UInt32  x,
UInt32  n,
UInt32  m 
) [static]

Definition at line 392 of file bmz.c.

static UInt64 r_hash_mask ( const Byte buf,
size_t  len,
UInt64  b,
UInt64  m 
) [static]

Definition at line 436 of file bmz.c.

static UInt32 r_hash_mask16x2 ( const Byte buf,
size_t  len,
UInt32  b1,
UInt32  b2 
) [static]

Definition at line 447 of file bmz.c.

static UInt32 r_hash_mask32 ( const Byte buf,
size_t  len,
UInt32  b,
UInt32  m 
) [static]

Definition at line 441 of file bmz.c.

static UInt64 r_hash_mask32x2 ( const Byte buf,
size_t  len,
UInt64  b1,
UInt64  b2 
) [static]

Definition at line 453 of file bmz.c.

static UInt64 r_hash_mod ( const Byte buf,
size_t  len,
size_t  b,
UInt64  m 
) [static]

Definition at line 358 of file bmz.c.

static UInt32 r_hash_mod16x2 ( const Byte buf,
size_t  len,
UInt32  b1,
UInt32  b2,
UInt32  m1,
UInt32  m2 
) [static]

Definition at line 369 of file bmz.c.

static UInt32 r_hash_mod32 ( const Byte buf,
size_t  len,
UInt32  b,
UInt32  m 
) [static]

Definition at line 363 of file bmz.c.

static size_t random_prime ( size_t  excluded  )  [static]

Definition at line 267 of file bmz.c.

static UInt64 update_hash_mask ( UInt64  h,
int  in,
int  out,
UInt64  pow_n,
UInt64  b,
UInt64  m 
) [inline, static]

Definition at line 485 of file bmz.c.

static UInt32 update_hash_mask16x2 ( UInt32  h,
int  in,
int  out,
UInt32  pow1,
UInt32  pow2,
UInt32  b1,
UInt32  b2 
) [inline, static]

Definition at line 496 of file bmz.c.

static UInt32 update_hash_mask32 ( UInt32  h,
int  in,
int  out,
UInt32  pow_n,
UInt32  b,
UInt32  m 
) [inline, static]

Definition at line 490 of file bmz.c.

static UInt64 update_hash_mask32x2 ( UInt64  h,
int  in,
int  out,
UInt64  pow1,
UInt64  pow2,
UInt64  b1,
UInt64  b2 
) [inline, static]

Definition at line 503 of file bmz.c.

static Int64 update_hash_mod ( Int64  h,
Byte  in,
Byte  out,
Int64  pow_n,
Int64  b,
Int64  m 
) [inline, static]

Definition at line 405 of file bmz.c.

static UInt32 update_hash_mod16x2 ( UInt32  h,
int  in,
int  out,
UInt32  pow1,
UInt32  pow2,
UInt32  b1,
UInt32  b2,
UInt32  m1,
UInt32  m2 
) [inline, static]

Definition at line 415 of file bmz.c.

static Int32 update_hash_mod32 ( Int32  h,
Byte  in,
Byte  out,
Int32  pow_n,
Int32  b,
Int32  m 
) [inline, static]

Definition at line 410 of file bmz.c.


Variable Documentation

size_t s_collision_thresh = 0 [static]

Definition at line 201 of file bmz.c.

BmzDieProc s_die_proc = builtin_die [static]

Definition at line 218 of file bmz.c.

BmzOutProc s_out_proc = builtin_out [static]

Definition at line 217 of file bmz.c.

size_t s_primes[] [static]

Definition at line 96 of file bmz.c.

int s_verbosity = 0 [static]

Definition at line 204 of file bmz.c.


Generated on Sat Aug 15 08:56:10 2009 for hypertable by  doxygen 1.5.9