bmz.c

Go to the documentation of this file.
00001 
00028 /* Avoid platform specific stuff in this file */
00029 #include <stdio.h>
00030 #include <stdlib.h>
00031 #include <stdint.h>
00032 #include <string.h>
00033 #include <math.h>
00034 #include <time.h>
00035 
00036 #include "bmz-internal.h"
00037 #include "../lzo/minilzo.h"
00038 
00039 /* Initial bases for computing Rabin Karp rolling hash */
00040 #define BM_B1 257
00041 #define BM_B2 277
00042 #define BM_M1 0xffff
00043 #define BM_M2 (0xffff - 4)
00044 
00045 #define BM_MASK16 0xffff
00046 #define BM_MASK24 0xffffff
00047 #define BM_MASK32 0xffffffff
00048 #define BM_MASKSZ BM_MASK32
00049 
00050 /* Escape character
00051  * Don't change without understanding BM_DECODE_POS
00052  */
00053 #define BM_ESC 0xfe
00054 #define BM_MAX_LEN 0xfdffffffffffull /* 253TB block should be big enough */
00055 #define BM_MAX_1B  0xfd
00056 #define BM_MAX_2B  0xfdff
00057 #define BM_MAX_3B  0xfdffff
00058 #define BM_MAX_4B  0xfdfffffful
00059 #define BM_MAX_5B  0xfdffffffffull
00060 
00061 /* VInt limits */
00062 #define BM_MAX_V1B 0x7f
00063 #define BM_MAX_V2B 0x3fff
00064 #define BM_MAX_V3B 0x1fffff
00065 #define BM_MAX_V4B 0xfffffff
00066 #define BM_MAX_V5B 0x7ffffffffull
00067 #define BM_MAX_V6B 0x3ffffffffffull
00068 
00069 /* Rolling hash collision thresholds */
00070 #define BM_COLLISION_ABORT_THRESH 1
00071 #define BM_ABORT_COLLISIONS \
00072   ((BM_COLLISION_ABORT_THRESH + 1) * collision_thresh + 1)
00073 
00074 /* Some colors for debugging/dump output */
00075 #define BM_COLOR_DBG "\x1b[31m"         /* red */
00076 #define BM_COLOR_ALT "\x1b[32m"         /* green */
00077 #define BM_COLOR_END "\x1b[m"
00078 
00079 /* May need to do something here, in case stdint.h is not available */
00080 typedef uint8_t Byte;
00081 typedef uint32_t UInt32;
00082 typedef uint64_t UInt64;
00083 typedef int64_t Int64;
00084 typedef int32_t Int32;
00085 
00086 /* For printf %llu in case some system has funky headers */
00087 typedef long long unsigned Llu;
00088 
00089 /* For lookup table */
00090 #define BM_NPOS ((size_t)-1)
00091 
00092 /* Aligning memory for work space */
00093 #define BM_ALIGN(_mem_, _n_) (Byte *)(_mem_) + _n_ - (((size_t)(_mem_))%(_n_))
00094 
00095 /* Some prime numbers as candidates for RK hash bases in case of adaptation */
00096 static size_t s_primes[] = {
00097            3,    5,    7,   11,   13,   17,   19,   23,   29,
00098     31,   37,   41,   43,   47,   53,   59,   61,   67,   71,
00099     73,   79,   83,   89,   97,  101,  103,  107,  109,  113,
00100    127,  131,  137,  139,  149,  151,  157,  163,  167,  173,
00101    179,  181,  191,  193,  197,  199,  211,  223,  227,  229,
00102    233,  239,  241,  251,  257,  263,  269,  271,  277,  281,
00103    283,  293,  307,  311,  313,  317,  331,  337,  347,  349,
00104    353,  359,  367,  373,  379,  383,  389,  397,  401,  409,
00105    419,  421,  431,  433,  439,  443,  449,  457,  461,  463,
00106    467,  479,  487,  491,  499,  503,  509,  521,  523,  541,
00107    547,  557,  563,  569,  571,  577,  587,  593,  599,  601,
00108    607,  613,  617,  619,  631,  641,  643,  647,  653,  659,
00109    661,  673,  677,  683,  691,  701,  709,  719,  727,  733,
00110    739,  743,  751,  757,  761,  769,  773,  787,  797,  809,
00111    811,  821,  823,  827,  829,  839,  853,  857,  859,  863,
00112    877,  881,  883,  887,  907,  911,  919,  929,  937,  941,
00113    947,  953,  967,  971,  977,  983,  991,  997, 1009, 1013,
00114   1019, 1021, 1031, 1033, 1039, 1049, 1051, 1061, 1063, 1069,
00115   1087, 1091, 1093, 1097, 1103, 1109, 1117, 1123, 1129, 1151,
00116   1153, 1163, 1171, 1181, 1187, 1193, 1201, 1213, 1217, 1223,
00117   1229, 1231, 1237, 1249, 1259, 1277, 1279, 1283, 1289, 1291,
00118   1297, 1301, 1303, 1307, 1319, 1321, 1327, 1361, 1367, 1373,
00119   1381, 1399, 1409, 1423, 1427, 1429, 1433, 1439, 1447, 1451,
00120   1453, 1459, 1471, 1481, 1483, 1487, 1489, 1493, 1499, 1511,
00121   1523, 1531, 1543, 1549, 1553, 1559, 1567, 1571, 1579, 1583,
00122   1597, 1601, 1607, 1609, 1613, 1619, 1621, 1627, 1637, 1657,
00123   1663, 1667, 1669, 1693, 1697, 1699, 1709, 1721, 1723, 1733,
00124   1741, 1747, 1753, 1759, 1777, 1783, 1787, 1789, 1801, 1811,
00125   1823, 1831, 1847, 1861, 1867, 1871, 1873, 1877, 1879, 1889,
00126   1901, 1907, 1913, 1931, 1933, 1949, 1951, 1973, 1979, 1987,
00127   1993, 1997, 1999, 2003, 2011, 2017, 2027, 2029, 2039, 2053,
00128   2063, 2069, 2081, 2083, 2087, 2089, 2099, 2111, 2113, 2129,
00129   2131, 2137, 2141, 2143, 2153, 2161, 2179, 2203, 2207, 2213,
00130   2221, 2237, 2239, 2243, 2251, 2267, 2269, 2273, 2281, 2287,
00131   2293, 2297, 2309, 2311, 2333, 2339, 2341, 2347, 2351, 2357,
00132   2371, 2377, 2381, 2383, 2389, 2393, 2399, 2411, 2417, 2423,
00133   2437, 2441, 2447, 2459, 2467, 2473, 2477, 2503, 2521, 2531,
00134   2539, 2543, 2549, 2551, 2557, 2579, 2591, 2593, 2609, 2617,
00135   2621, 2633, 2647, 2657, 2659, 2663, 2671, 2677, 2683, 2687,
00136   2689, 2693, 2699, 2707, 2711, 2713, 2719, 2729, 2731, 2741,
00137   2749, 2753, 2767, 2777, 2789, 2791, 2797, 2801, 2803, 2819,
00138   2833, 2837, 2843, 2851, 2857, 2861, 2879, 2887, 2897, 2903,
00139   2909, 2917, 2927, 2939, 2953, 2957, 2963, 2969, 2971, 2999,
00140   3001, 3011, 3019, 3023, 3037, 3041, 3049, 3061, 3067, 3079,
00141   3083, 3089, 3109, 3119, 3121, 3137, 3163, 3167, 3169, 3181,
00142   3187, 3191, 3203, 3209, 3217, 3221, 3229, 3251, 3253, 3257,
00143   3259, 3271, 3299, 3301, 3307, 3313, 3319, 3323, 3329, 3331,
00144   3343, 3347, 3359, 3361, 3371, 3373, 3389, 3391, 3407, 3413,
00145   3433, 3449, 3457, 3461, 3463, 3467, 3469, 3491, 3499, 3511,
00146   3517, 3527, 3529, 3533, 3539, 3541, 3547, 3557, 3559, 3571,
00147   3581, 3583, 3593, 3607, 3613, 3617, 3623, 3631, 3637, 3643,
00148   3659, 3671, 3673, 3677, 3691, 3697, 3701, 3709, 3719, 3727,
00149   3733, 3739, 3761, 3767, 3769, 3779, 3793, 3797, 3803, 3821,
00150   3823, 3833, 3847, 3851, 3853, 3863, 3877, 3881, 3889, 3907,
00151   3911, 3917, 3919, 3923, 3929, 3931, 3943, 3947, 3967, 3989,
00152   4001, 4003, 4007, 4013, 4019, 4021, 4027, 4049, 4051, 4057,
00153   4073, 4079, 4091, 4093, 4099, 4111, 4127, 4129, 4133, 4139,
00154   4153, 4157, 4159, 4177, 4201, 4211, 4217, 4219, 4229, 4231,
00155   4241, 4243, 4253, 4259, 4261, 4271, 4273, 4283, 4289, 4297,
00156   4327, 4337, 4339, 4349, 4357, 4363, 4373, 4391, 4397, 4409,
00157   4421, 4423, 4441, 4447, 4451, 4457, 4463, 4481, 4483, 4493,
00158   4507, 4513, 4517, 4519, 4523, 4547, 4549, 4561, 4567, 4583,
00159   4591, 4597, 4603, 4621, 4637, 4639, 4643, 4649, 4651, 4657,
00160   4663, 4673, 4679, 4691, 4703, 4721, 4723, 4729, 4733, 4751,
00161   4759, 4783, 4787, 4789, 4793, 4799, 4801, 4813, 4817, 4831,
00162   4861, 4871, 4877, 4889, 4903, 4909, 4919, 4931, 4933, 4937,
00163   4943, 4951, 4957, 4967, 4969, 4973, 4987, 4993, 4999, 5003,
00164   5009, 5011, 5021, 5023, 5039, 5051, 5059, 5077, 5081, 5087,
00165   5099, 5101, 5107, 5113, 5119, 5147, 5153, 5167, 5171, 5179,
00166   5189, 5197, 5209, 5227, 5231, 5233, 5237, 5261, 5273, 5279,
00167   5281, 5297, 5303, 5309, 5323, 5333, 5347, 5351, 5381, 5387,
00168   5393, 5399, 5407, 5413, 5417, 5419, 5431, 5437, 5441, 5443,
00169   5449, 5471, 5477, 5479, 5483, 5501, 5503, 5507, 5519, 5521,
00170   5527, 5531, 5557, 5563, 5569, 5573, 5581, 5591, 5623, 5639,
00171   5641, 5647, 5651, 5653, 5657, 5659, 5669, 5683, 5689, 5693,
00172   5701, 5711, 5717, 5737, 5741, 5743, 5749, 5779, 5783, 5791,
00173   5801, 5807, 5813, 5821, 5827, 5839, 5843, 5849, 5851, 5857,
00174   5861, 5867, 5869, 5879, 5881, 5897, 5903, 5923, 5927, 5939,
00175   5953, 5981, 5987, 6007, 6011, 6029, 6037, 6043, 6047, 6053,
00176   6067, 6073, 6079, 6089, 6091, 6101, 6113, 6121, 6131, 6133,
00177   6143, 6151, 6163, 6173, 6197, 6199, 6203, 6211, 6217, 6221,
00178   6229, 6247, 6257, 6263, 6269, 6271, 6277, 6287, 6299, 6301,
00179   6311, 6317, 6323, 6329, 6337, 6343, 6353, 6359, 6361, 6367,
00180   6373, 6379, 6389, 6397, 6421, 6427, 6449, 6451, 6469, 6473,
00181   6481, 6491, 6521, 6529, 6547, 6551, 6553, 6563, 6569, 6571,
00182   6577, 6581, 6599, 6607, 6619, 6637, 6653, 6659, 6661, 6673,
00183   6679, 6689, 6691, 6701, 6703, 6709, 6719, 6733, 6737, 6761,
00184   6763, 6779, 6781, 6791, 6793, 6803, 6823, 6827, 6829, 6833,
00185   6841, 6857, 6863, 6869, 6871, 6883, 6899, 6907, 6911, 6917,
00186   6947, 6949, 6959, 6961, 6967, 6971, 6977, 6983, 6991, 6997,
00187   7001, 7013, 7019, 7027, 7039, 7043, 7057, 7069, 7079, 7103,
00188   7109, 7121, 7127, 7129, 7151, 7159, 7177, 7187, 7193, 7207,
00189   7211, 7213, 7219, 7229, 7237, 7243, 7247, 7253, 7283, 7297,
00190   7307, 7309, 7321, 7331, 7333, 7349, 7351, 7369, 7393, 7411,
00191   7417, 7433, 7451, 7457, 7459, 7477, 7481, 7487, 7489, 7499,
00192   7507, 7517, 7523, 7529, 7537, 7541, 7547, 7549, 7559, 7561,
00193   7573, 7577, 7583, 7589, 7591, 7603, 7607, 7621, 7639, 7643,
00194   7649, 7669, 7673, 7681, 7687, 7691, 7699, 7703, 7717, 7723,
00195   7727, 7741, 7753, 7757, 7759, 7789, 7793, 7817, 7823, 7829,
00196   7841, 7853, 7867, 7873, 7877, 7879, 7883, 7901, 7907, 7919,
00197   7927, 7933, 7937, 7949, 7951, 7963, 7993, 8009, 8011, 8017
00198 };
00199 
00200 /* Hash collision adaption threshold */
00201 static size_t s_collision_thresh = 0;
00202 
00203 /* Logging/messaging facilities */
00204 static int s_verbosity = 0;
00205 
00206 static void
00207 builtin_out(const char *buf, size_t len) {
00208   fwrite(buf, 1, len, stderr);
00209 }
00210 
00211 static HT_NORETURN void
00212 builtin_die(const char *msg) {
00213   fputs(msg, stderr);
00214   exit(1);
00215 }
00216 
00217 static BmzOutProc s_out_proc = builtin_out;
00218 static BmzDieProc s_die_proc = builtin_die;
00219 
00220 #define BM_LOG(_level_, _fmt_, ...) if (s_verbosity >= _level_) do { \
00221   char msg[256]; \
00222   int len = snprintf(msg, sizeof(msg), "%s: " _fmt_, __FUNCTION__, \
00223                      ##__VA_ARGS__); \
00224   s_out_proc(msg, len); \
00225 } while (0)
00226 
00227 #define BM_LOG_(_level_, _buf_, _len_) if (s_verbosity >= _level_) do { \
00228   s_out_proc((char *)_buf_, _len_); \
00229 } while (0)
00230 
00231 #define BM_DIE(_fmt_, ...) do { \
00232   char msg[256]; \
00233   snprintf(msg, sizeof(msg), "fatal: %s: " _fmt_ "\n", \
00234            __FUNCTION__, ##__VA_ARGS__); \
00235   s_die_proc(msg); \
00236 } while (0)
00237 
00238 #define BM_CHECK(_cond_) \
00239     if (_cond_); else BM_DIE("%s:%d: expects: %s", \
00240                              __FILE__, __LINE__, #_cond_)
00241 
00242 int
00243 bmz_set_verbosity(int verbosity) {
00244   int old = s_verbosity;
00245   s_verbosity = verbosity;
00246   return old;
00247 }
00248 
00249 BmzOutProc
00250 bmz_set_out_proc(BmzOutProc proc) {
00251   BmzOutProc old = s_out_proc;
00252   s_out_proc = proc;
00253   return old;
00254 }
00255 
00256 int
00257 bmz_set_collision_thresh(int thresh) {
00258   int old = s_collision_thresh;
00259   s_collision_thresh = thresh;
00260   return old;
00261 }
00262 
00263 /* Pick one or two primes out of 1009
00264  * used for adaptive hashing in face of bad data
00265  */
00266 static size_t
00267 random_prime(size_t excluded) {
00268   const size_t n = sizeof(s_primes) / sizeof(size_t);
00269   size_t i = 0;
00270    /* Don't really care if rand/srand are not thread-safe for
00271     * predictable results, as long as it doesn't crash.
00272     */
00273   static int sranded = 0;
00274 
00275   if (!sranded) {
00276     srand(time(NULL));
00277     sranded = 1;
00278   }
00279 
00280   for (; i < n; ++i) {
00281     /* random() in os x and bsd is better, but not in glibc yet. */
00282     size_t s = s_primes[rand() % n];
00283     if (s != excluded) return s;
00284   }
00285   /* Safety net, though highly unlikely (1/1009^1009) to reach here */
00286   return 8111;
00287 }
00288 
00289 static size_t
00290 bm_lut_size(size_t n) {
00291   UInt64 x = n; /* size_t could be 64-bit */
00292 
00293   do {
00294     /* power of 2 for mask mod */
00295     x |= (x >> 1);
00296     x |= (x >> 2);
00297     x |= (x >> 4);
00298     x |= (x >> 8);
00299     x |= (x >> 16);
00300     x |= (x >> 32);
00301     ++x;
00302   } while (x < n * 8 / 5); /* avoid too much probes */
00303   return x;
00304 }
00305 
00306 typedef struct {
00307   size_t hash;
00308   size_t pos;
00309 } BmLutEntry;
00310 
00311 typedef struct {
00312   size_t size, collisions, adapts, probes;
00313   BmLutEntry *entries;
00314 } BmLut;
00315 
00316 static void
00317 init_bmlut(BmLut *table, size_t n, void *work_mem) {
00318   BmLutEntry *p, *endp;
00319 
00320   table->probes = table->adapts = table->collisions = 0;
00321   table->size = bm_lut_size(n);
00322   table->entries = (BmLutEntry *) work_mem;
00323 
00324   for (p = table->entries, endp = p + table->size; p < endp; ++p)
00325     p->pos = BM_NPOS;
00326 
00327   BM_LOG(1, "n(fps): %llu, size: %llu\n", (Llu)n, (Llu)table->size);
00328 }
00329 
00330 /* Simple hash lookup with probing */
00331 #define BM_LOOKUP(_lut_, _hash_, _entry_) do { \
00332   size_t sz = _lut_.size, idx = (_hash_) & (sz - 1), probes = 0; \
00333   BmLutEntry *b = _lut_.entries, *p = b + idx, *endp = b + sz; \
00334   \
00335   while (p->pos != BM_NPOS) { \
00336     if (p->hash == _hash_) break; \
00337     ++p; \
00338     if (p >= endp) p = b; \
00339     ++probes; \
00340   } \
00341   if (probes) _lut_.probes += probes; \
00342   _entry_ = p; \
00343 } while (0)
00344 
00345 /* Classical/generic Rabin Karp */
00346 #define R_HASH_MOD_BODY(_int_type_) \
00347   _int_type_ h = 0, p = 1; \
00348   \
00349   while (len--) { \
00350     h += p * buf[len]; \
00351     h %= m; \
00352     p *= b; \
00353     p %= m; \
00354   } \
00355   return h
00356 
00357 static UInt64
00358 r_hash_mod(const Byte *buf, size_t len, size_t b, UInt64 m) {
00359   R_HASH_MOD_BODY(UInt64);
00360 }
00361 
00362 static UInt32
00363 r_hash_mod32(const Byte *buf, size_t len, UInt32 b, UInt32 m) {
00364   R_HASH_MOD_BODY(UInt32);
00365 }
00366 
00367 /* D* rolling hash family (Andrew Trigdell's thesis p72-73) */
00368 static UInt32
00369 r_hash_mod16x2(const Byte *buf, size_t len, UInt32 b1, UInt32 b2,
00370                UInt32 m1, UInt32 m2) {
00371   return (r_hash_mod32(buf, len, b1, m1) << 16) |
00372          r_hash_mod32(buf, len, b2, m2);
00373 }
00374 
00375 /* Compute (x^n) % m */
00376 #define POW_MOD_BODY(_pow_fun_, _int_type_) \
00377   if (n == 0) return 1; \
00378   if (n == 1) return x % m; \
00379   { \
00380     _int_type_ sqr = (x * x) % m; \
00381     _int_type_ pow = _pow_fun_(sqr, n / 2, m); \
00382     if (n & 1) return (pow * x) % m; \
00383     return pow % m; \
00384   }
00385 
00386 static UInt64
00387 pow_mod(UInt64 x, UInt64 n, UInt64 m) {
00388   POW_MOD_BODY(pow_mod, UInt64)
00389 }
00390 
00391 static UInt32
00392 pow_mod32(UInt32 x, UInt32 n, UInt32 m) {
00393   POW_MOD_BODY(pow_mod32, UInt32)
00394 }
00395 
00396 /* update rolling hash */
00397 #define UPDATE_HASH_MOD_BODY \
00398   h *= b; \
00399   h -= (out * pow_n) % m; \
00400   h += in; \
00401   if (h < 0) h += m; \
00402   return (h % m)
00403 
00404 static inline Int64
00405 update_hash_mod(Int64 h, Byte in, Byte out, Int64 pow_n, Int64 b, Int64 m) {
00406   UPDATE_HASH_MOD_BODY;
00407 }
00408 
00409 static inline Int32
00410 update_hash_mod32(Int32 h, Byte in, Byte out, Int32 pow_n, Int32 b, Int32 m) {
00411   UPDATE_HASH_MOD_BODY;
00412 }
00413 
00414 static inline UInt32
00415 update_hash_mod16x2(UInt32 h, int in, int out, UInt32 pow1, UInt32 pow2,
00416                     UInt32 b1, UInt32 b2, UInt32 m1, UInt32 m2) {
00417   return (update_hash_mod32((h >> 16), in, out, pow1, b1, m1) << 16) |
00418          update_hash_mod32((h & BM_MASK16), in, out, pow2, b2, m2);
00419 }
00420 
00421 /* Faster hash using mask instead of mod
00422  * m needs to be power-of-2
00423  */
00424 #define R_HASH_MASK_BODY(int_type) \
00425   int_type h = 0, p = 1; \
00426   \
00427   while (len--) { \
00428     h += p * buf[len]; \
00429     h &= m; \
00430     p *= b; \
00431     p &= m; \
00432   } \
00433   return h
00434 
00435 static UInt64
00436 r_hash_mask(const Byte *buf, size_t len, UInt64 b, UInt64 m) {
00437   R_HASH_MASK_BODY(UInt64);
00438 }
00439 
00440 static UInt32
00441 r_hash_mask32(const Byte *buf, size_t len, UInt32 b, UInt32 m) {
00442   R_HASH_MASK_BODY(UInt32);
00443 }
00444 
00445 /* C* rolling hash family (see D* above) */
00446 static UInt32
00447 r_hash_mask16x2(const Byte *buf, size_t len, UInt32 b1, UInt32 b2) {
00448   return (r_hash_mask32(buf, len, b1, BM_MASK16) << 16) |
00449          r_hash_mask32(buf, len, b2, BM_MASK16);
00450 }
00451 
00452 static UInt64
00453 r_hash_mask32x2(const Byte *buf, size_t len, UInt64 b1, UInt64 b2) {
00454   return (r_hash_mask(buf, len, b1, BM_MASK32) << 32) |
00455          r_hash_mask(buf, len, b2, BM_MASK32);
00456 }
00457 
00458 #define POW_MASK_BODY(_pow_fun_, _int_type_) \
00459   if (n == 0) return 1; \
00460   if (n == 1) return (x & m); \
00461   { \
00462     _int_type_ sqr = (x * x) & m; \
00463     _int_type_ pow = _pow_fun_(sqr, n / 2, m); \
00464     if (n & 1) return ((pow * x) & m); \
00465     return (pow & m); \
00466   }
00467 
00468 static UInt64
00469 pow_mask(UInt64 x, UInt64 n, UInt64 m) {
00470   POW_MASK_BODY(pow_mask, UInt64)
00471 }
00472 
00473 static UInt32
00474 pow_mask32(UInt32 x, UInt32 n, UInt32 m) {
00475   POW_MASK_BODY(pow_mask32, UInt32)
00476 }
00477 
00478 #define UPDATE_HASH_MASK_BODY \
00479   h *= b; \
00480   h -= (out * pow_n) & m; \
00481   h += in; \
00482   return (h & m)
00483 
00484 static inline UInt64
00485 update_hash_mask(UInt64 h, int in, int out, UInt64 pow_n, UInt64 b, UInt64 m) {
00486   UPDATE_HASH_MASK_BODY;
00487 }
00488 
00489 static inline UInt32
00490 update_hash_mask32(UInt32 h, int in, int out, UInt32 pow_n,
00491                    UInt32 b, UInt32 m) {
00492   UPDATE_HASH_MASK_BODY;
00493 }
00494 
00495 static inline UInt32
00496 update_hash_mask16x2(UInt32 h, int in, int out, UInt32 pow1, UInt32 pow2,
00497                      UInt32 b1, UInt32 b2) {
00498   return (update_hash_mask32((h >> 16), in, out, pow1, b1, BM_MASK16) << 16) |
00499          update_hash_mask32((h & BM_MASK16), in, out, pow2, b2, BM_MASK16);
00500 }
00501 
00502 static inline UInt64
00503 update_hash_mask32x2(UInt64 h, int in, int out, UInt64 pow1, UInt64 pow2,
00504                      UInt64 b1, UInt64 b2) {
00505   return (update_hash_mask((h >> 32), in, out, pow1, b1, BM_MASK32) << 32) |
00506          update_hash_mask((h & BM_MASK32), in, out, pow2, b2, BM_MASK32);
00507 }
00508 
00509 /* External AP Ifor computing the hashes */
00510 size_t
00511 bmz_hash_mod(const void *in, size_t in_len, size_t b, size_t m) {
00512   return r_hash_mod((Byte *)in, in_len, b, m);
00513 }
00514 
00515 size_t
00516 bmz_hash_mod16x2(const void *in, size_t in_len, size_t b1, size_t b2,
00517                  size_t m1, size_t m2) {
00518   return r_hash_mod16x2((Byte *)in, in_len, b1, b2, m1, m2);
00519 }
00520 
00521 size_t
00522 bmz_hash_mask16x2(const void *in, size_t in_len, size_t b1, size_t b2) {
00523   return r_hash_mask16x2((Byte *)in, in_len, b1, b2);
00524 }
00525 
00526 size_t
00527 bmz_hash_mask(const void *in, size_t in_len, size_t b) {
00528   return r_hash_mask((Byte *)in, in_len, b, BM_MASKSZ);
00529 }
00530 
00531 size_t
00532 bmz_hash_mask32x2(const void *in, size_t in_len, size_t b1, size_t b2) {
00533   return r_hash_mask32x2((Byte *)in, in_len, b1, b2);
00534 }
00535 
00536 /* Verify the rolling hashes */
00537 #define BM_HASH_CHECK_BODY(_init_hash_, _rehash_, _update_hash_) \
00538   size_t hash; \
00539   Byte *in = (Byte *)src, *ip = in, *in_end = in + in_len; \
00540   int errs = 0; \
00541   \
00542   BM_CHECK(fp_len < in_len); \
00543   _init_hash_; \
00544   \
00545   for (; ip < in_end - fp_len - 1; ++ip) { \
00546     size_t h0 = _rehash_; \
00547     hash = _update_hash_; \
00548     if (h0 != hash) { \
00549       BM_LOG(0, "mismatch at pos %ld: %lx vs %lx\n", \
00550              (long)(ip - in), (long)h0, (long)hash); \
00551       ++errs; \
00552     } \
00553   } \
00554   BM_LOG(1, "total errors: %d\n", errs); \
00555   return errs ? BMZ_E_ERROR : BMZ_E_OK
00556 
00557 #define BM_HASH_BENCH_BODY(_init_hash_, _update_hash_) \
00558   size_t hash; \
00559   Byte *in = (Byte *)src, *ip = in, *in_end = in + in_len; \
00560   \
00561   BM_CHECK(fp_len < in_len); \
00562   _init_hash_; \
00563   \
00564   for (; ip < in_end - fp_len - 1; ++ip) { \
00565     hash = _update_hash_; \
00566   } \
00567   BM_LOG(1, "last hash: %lx\n", (long)hash) /* or the loop gets optimized out */
00568 
00569 int
00570 bmz_check_hash_mod(const void *src, size_t in_len, size_t fp_len,
00571                    size_t b, size_t m) {
00572   size_t pow_n;
00573 
00574   BM_HASH_CHECK_BODY(hash = r_hash_mod(in, fp_len, b, m);
00575                      pow_n = pow_mod(b, fp_len, m),
00576                      r_hash_mod(ip + 1, fp_len, b, m),
00577                      update_hash_mod(hash, ip[fp_len], *ip, pow_n, b, m));
00578 }
00579 
00580 void
00581 bmz_bench_hash_mod(const void *src, size_t in_len, size_t fp_len) {
00582   UInt64 b = BM_B1, m = BM_MASKSZ;
00583   size_t pow_n;
00584 
00585   BM_HASH_BENCH_BODY(hash = r_hash_mod(in, fp_len, b, m);
00586                      pow_n = pow_mod(b, fp_len, m),
00587                      update_hash_mod(hash, ip[fp_len], *ip, pow_n, b, m));
00588 }
00589 
00590 int
00591 bmz_check_hash_mod16x2(const void *src, size_t in_len, size_t fp_len,
00592                        size_t b1, size_t b2, size_t m1, size_t m2) {
00593   size_t pow_n_b1, pow_n_b2;
00594 
00595   BM_HASH_CHECK_BODY(hash = r_hash_mod16x2(in, fp_len, b1, b2, m1, m2);
00596                      pow_n_b1 = pow_mod32(b1, fp_len, m1);
00597                      pow_n_b2 = pow_mod32(b2, fp_len, m2),
00598                      r_hash_mod16x2(ip + 1, fp_len, b1, b2, m1, m2),
00599                      update_hash_mod16x2(hash, ip[fp_len], *ip, pow_n_b1,
00600                                          pow_n_b2, b1, b2, m1, m2));
00601 }
00602 
00603 void
00604 bmz_bench_hash_mod16x2(const void *src, size_t in_len, size_t fp_len) {
00605   size_t pow_n_b1, pow_n_b2, b1 = BM_B1, b2 = BM_B2, m1 = BM_M1, m2 = BM_M2;
00606 
00607   BM_HASH_BENCH_BODY(hash = r_hash_mod16x2(in, fp_len, b1, b2, m1, m2);
00608                      pow_n_b1 = pow_mod32(b1, fp_len, m1);
00609                      pow_n_b2 = pow_mod32(b2, fp_len, m2),
00610                      update_hash_mod16x2(hash, ip[fp_len], *ip, pow_n_b1,
00611                                          pow_n_b2, b1, b2, m1, m2));
00612 }
00613 
00614 int
00615 bmz_check_hash_mask16x2(const void *src, size_t in_len, size_t fp_len,
00616                         size_t b1, size_t b2) {
00617   size_t pow_n_b1, pow_n_b2;
00618 
00619   BM_HASH_CHECK_BODY(hash = r_hash_mask16x2(in, fp_len, b1, b2);
00620                      pow_n_b1 = pow_mask32(b1, fp_len, BM_MASK16);
00621                      pow_n_b2 = pow_mask32(b2, fp_len, BM_MASK16),
00622                      r_hash_mask16x2(ip + 1, fp_len, b1, b2),
00623                      update_hash_mask16x2(hash, ip[fp_len], *ip, pow_n_b1,
00624                                           pow_n_b2, b1, b2));
00625 }
00626 
00627 void
00628 bmz_bench_hash_mask16x2(const void *src, size_t in_len, size_t fp_len) {
00629   size_t pow_n_b1, pow_n_b2, b1 = BM_B1, b2 = BM_B2;
00630 
00631   BM_HASH_BENCH_BODY(hash = r_hash_mask16x2(in, fp_len, b1, b2);
00632                      pow_n_b1 = pow_mask32(b1, fp_len, BM_MASK16);
00633                      pow_n_b2 = pow_mask32(b2, fp_len, BM_MASK16),
00634                      update_hash_mask16x2(hash, ip[fp_len], *ip, pow_n_b1,
00635                                           pow_n_b2, b1, b2));
00636 }
00637 
00638 int
00639 bmz_check_hash_mask(const void *src, size_t in_len, size_t fp_len,
00640                     size_t b) {
00641   size_t pow_n, m = BM_MASKSZ;
00642 
00643   BM_HASH_CHECK_BODY(hash = r_hash_mask(in, fp_len, b, m);
00644                      pow_n = pow_mask(b, fp_len, m),
00645                      r_hash_mask(ip + 1, fp_len, b, m),
00646                      update_hash_mask(hash, ip[fp_len], *ip, pow_n, b, m));
00647 }
00648 
00649 void
00650 bmz_bench_hash_mask(const void *src, size_t in_len, size_t fp_len) {
00651   size_t pow_n, b = BM_B1, m = BM_MASKSZ;
00652 
00653   BM_HASH_BENCH_BODY(hash = r_hash_mask(in, fp_len, b, m);
00654                      pow_n = pow_mask(b, fp_len, m),
00655                      update_hash_mask(hash, *ip, ip[fp_len], pow_n, b, m));
00656 }
00657 
00658 int
00659 bmz_check_hash_mask32x2(const void *src, size_t in_len, size_t fp_len,
00660                         size_t b1, size_t b2) {
00661   size_t pow_n_b1, pow_n_b2;
00662 
00663   BM_HASH_CHECK_BODY(hash = r_hash_mask32x2(in, fp_len, b1, b2);
00664                      pow_n_b1 = pow_mask(b1, fp_len, BM_MASK32);
00665                      pow_n_b2 = pow_mask(b2, fp_len, BM_MASK32),
00666                      r_hash_mask32x2(ip + 1, fp_len, b1, b2),
00667                      update_hash_mask32x2(hash, ip[fp_len], *ip, pow_n_b1,
00668                                           pow_n_b2, b1, b2));
00669 }
00670 
00671 void
00672 bmz_bench_hash_mask32x2(const void *src, size_t in_len, size_t fp_len) {
00673   size_t pow_n_b1, pow_n_b2, b1 = BM_B1, b2 = BM_B2;
00674 
00675   BM_HASH_BENCH_BODY(hash = r_hash_mask32x2(in, fp_len, b1, b2);
00676                      pow_n_b1 = pow_mask(b1, fp_len, BM_MASK32);
00677                      pow_n_b2 = pow_mask(b2, fp_len, BM_MASK32),
00678                      update_hash_mask32x2(hash, ip[fp_len], *ip, pow_n_b1,
00679                                           pow_n_b2, b1, b2));
00680 }
00681 
00682 void
00683 bmz_bench_hash(const void *in, size_t in_len, unsigned type) {
00684   size_t fp_len = 100;
00685 
00686   switch (type) {
00687   case BMZ_HASH_MOD:
00688     bmz_bench_hash_mod(in, in_len, fp_len);
00689     break;
00690   case BMZ_HASH_MOD16X2:
00691     bmz_bench_hash_mod16x2(in, in_len, fp_len);
00692     break;
00693   case BMZ_HASH_MASK16X2:
00694     bmz_bench_hash_mask16x2(in, in_len, fp_len);
00695     break;
00696   case BMZ_HASH_MASK:
00697     bmz_bench_hash_mask(in, in_len, fp_len);
00698     break;
00699   case BMZ_HASH_MASK32X2:
00700     bmz_bench_hash_mask32x2(in, in_len, fp_len);
00701   }
00702 }
00703 
00704 /* Benchmark lookup table performance */
00705 #define BM_LUT_BENCH_BODY(_init_hash_, _update_hash_) \
00706   size_t hash; \
00707   Byte *in = (Byte *)src, *ip = in, *in_end = in + in_len; \
00708   size_t scan_len = in_len - fp_len, c, boundary; \
00709   BmLut lut; \
00710   BmLutEntry *entry; \
00711   \
00712   BM_CHECK(fp_len < in_len); \
00713   init_bmlut(&lut, in_len / fp_len, work_mem); \
00714   _init_hash_; \
00715   \
00716   for (boundary = fp_len; ip < in_end - fp_len - 1; ++ip) { \
00717     size_t pos = ip - in; \
00718     BM_LOOKUP(lut, hash, entry); \
00719     \
00720     if (entry->pos != BM_NPOS) { \
00721       if (memcmp(in + entry->pos, ip, fp_len) != 0) ++lut.collisions; \
00722     } \
00723     if (pos == boundary) { \
00724       entry->hash = hash; \
00725       entry->pos = pos; \
00726       boundary += fp_len; \
00727     } \
00728     hash = _update_hash_; \
00729   } \
00730   c = lut.collisions; \
00731   BM_LOG(1, "length: %llu, lut.size: %llu, lut.collisions: %llu (eff. bits: " \
00732          "%.3f), lut.probes: %llu (%.3f/lu)\n", (Llu)in_len, (Llu)lut.size, \
00733          (Llu)c, c ? log((double)in_len / fp_len * scan_len / c) / log(2) \
00734                    : sizeof(size_t) * 8., \
00735          (Llu)lut.probes, (double)lut.probes / scan_len)
00736 
00737 void
00738 bmz_bench_lut_mod(const void *src, size_t in_len, size_t fp_len,
00739                   void *work_mem, size_t b, size_t m) {
00740   size_t pow_n;
00741 
00742   BM_LUT_BENCH_BODY(hash = r_hash_mod(in, fp_len, b, m);
00743                     pow_n = pow_mod(b, fp_len, m),
00744                     update_hash_mod(hash, ip[fp_len], *ip, pow_n, b, m));
00745 }
00746 
00747 void
00748 bmz_bench_lut_mod16x2(const void *src, size_t in_len, size_t fp_len,
00749                       void *work_mem, size_t b1, size_t b2,
00750                       size_t m1, size_t m2) {
00751   size_t pow_n_b1, pow_n_b2;
00752 
00753   BM_LUT_BENCH_BODY(hash = r_hash_mod16x2(in, fp_len, b1, b2, m1, m2);
00754                     pow_n_b1 = pow_mod32(b1, fp_len, m1);
00755                     pow_n_b2 = pow_mod32(b2, fp_len, m2),
00756                     update_hash_mod16x2(hash, ip[fp_len], *ip, pow_n_b1,
00757                                         pow_n_b2, b1, b2, m1, m2));
00758 }
00759 
00760 void
00761 bmz_bench_lut_mask16x2(const void *src, size_t in_len, size_t fp_len,
00762                    void *work_mem, size_t b1, size_t b2) {
00763   size_t pow_n_b1, pow_n_b2;
00764 
00765   BM_LUT_BENCH_BODY(hash = r_hash_mask16x2(in, fp_len, b1, b2);
00766                     pow_n_b1 = pow_mask32(b1, fp_len, BM_MASK16);
00767                     pow_n_b2 = pow_mask32(b2, fp_len, BM_MASK16),
00768                     update_hash_mask16x2(hash, ip[fp_len], *ip, pow_n_b1,
00769                                          pow_n_b2, b1, b2));
00770 }
00771 
00772 void
00773 bmz_bench_lut_mask(const void *src, size_t in_len, size_t fp_len,
00774                    void *work_mem, size_t b) {
00775   size_t pow_n, m = BM_MASKSZ;
00776 
00777   BM_LUT_BENCH_BODY(hash = r_hash_mask(in, fp_len, b, m);
00778                     pow_n = pow_mask(b, fp_len, m),
00779                     update_hash_mask(hash, *ip, ip[fp_len], pow_n, b, m));
00780 }
00781 
00782 void
00783 bmz_bench_lut_mask32x2(const void *src, size_t in_len, size_t fp_len,
00784                        void *work_mem, size_t b1, size_t b2) {
00785   size_t pow_n_b1, pow_n_b2;
00786 
00787   BM_LUT_BENCH_BODY(hash = r_hash_mask32x2(in, fp_len, b1, b2);
00788                     pow_n_b1 = pow_mask(b1, fp_len, BM_MASK32);
00789                     pow_n_b2 = pow_mask(b2, fp_len, BM_MASK32),
00790                     update_hash_mask32x2(hash, ip[fp_len], *ip, pow_n_b1,
00791                                          pow_n_b2, b1, b2));
00792 }
00793 
00794 /* One way to debug macro laden code:
00795  * gcc -E % | sed '/^\#/d' | gnuindent -st -i2 -kr > bmzpp.c
00796  */
00797 
00798 #define BM_NEED_IN(_n_) \
00799   if ((int)(in_end - ip) < (int)(_n_)) goto input_overrun
00800 
00801 #define BM_NEED_OUT(_n_) \
00802   if ((int)(out_end - op) < (int)(_n_)) goto output_overrun
00803 
00804 #define BM_ENCODE_OUTPUT(_op_, _ip_, _ip_end_) \
00805   while (_ip_ < _ip_end_) { \
00806     if (*_ip_ == BM_ESC) { \
00807       BM_NEED_OUT(1); \
00808       *_op_++ = BM_ESC; \
00809     } \
00810     BM_NEED_OUT(1); \
00811     *_op_++ = *_ip_++; \
00812   }
00813 
00814 /* Less comparison and earlier termination for uncompressible stuff */
00815 #define BM_ENCODE_FINAL_OUTPUT(_op_, _ip_, _ip_end_) \
00816   BM_NEED_OUT((_ip_end_) - (_ip_)); /* at least */ \
00817   while (_ip_ < _ip_end_) { \
00818     if (*_ip_ == BM_ESC) { \
00819       BM_NEED_OUT((_ip_end_) - (_ip_) + 1); \
00820       *_op_++ = BM_ESC; \
00821     } \
00822     *_op_++ = *_ip_++; \
00823   }
00824 
00825 #define BM_ENCODE_PASS do { \
00826   *out = 0; \
00827   memcpy(out + 1, in, in_len); \
00828   *out_len_p = in_len + 1; \
00829   BM_ENCODE_STAT("no reduction"); \
00830   return BMZ_E_OK; \
00831 } while (0)
00832 
00833 #define BM_ENCODE_INT(_op_, _n_, _width_) do { \
00834   UInt64 _n = _n_; \
00835   switch (_width_) { \
00836   case 1: \
00837     *_op_++ = (Byte) _n; \
00838     break; \
00839   case 2: \
00840     *_op_++ = (Byte)(_n >> 8); \
00841     *_op_++ = (Byte)(_n); \
00842     break; \
00843   case 3: \
00844     *_op_++ = (Byte)(_n >> 16); \
00845     *_op_++ = (Byte)(_n >> 8); \
00846     *_op_++ = (Byte)(_n); \
00847     break; \
00848   case 4: \
00849     *_op_++ = (Byte)(_n >> 24); \
00850     *_op_++ = (Byte)(_n >> 16); \
00851     *_op_++ = (Byte)(_n >> 8); \
00852     *_op_++ = (Byte)(_n); \
00853     break; \
00854   case 5: \
00855     *_op_++ = (Byte)(_n >> 32); \
00856     *_op_++ = (Byte)(_n >> 24); \
00857     *_op_++ = (Byte)(_n >> 16); \
00858     *_op_++ = (Byte)(_n >> 8); \
00859     *_op_++ = (Byte)(_n); \
00860     break; \
00861   case 6: \
00862     *_op_++ = (Byte)(_n >> 40); \
00863     *_op_++ = (Byte)(_n >> 32); \
00864     *_op_++ = (Byte)(_n >> 24); \
00865     *_op_++ = (Byte)(_n >> 16); \
00866     *_op_++ = (Byte)(_n >> 8); \
00867     *_op_++ = (Byte)(_n); \
00868     break; \
00869   default: \
00870     BM_DIE("%s", "256TB ought to be enough for anyone!"); \
00871   } \
00872 } while (0)
00873 
00874 #define BM_ENCODE_VINT(_op_, _n_, _width_) do { \
00875   UInt64 _n = _n_; \
00876   switch (_width_) { \
00877   case 1: \
00878     *_op_++ = (Byte)((_n) & 0x7f); \
00879     break; \
00880   case 2: \
00881     *_op_++ = (Byte)(_n | 0x80); \
00882     *_op_++ = (Byte)((_n >> 7) & 0x7f); \
00883     break; \
00884   case 3: \
00885     *_op_++ = (Byte)(_n | 0x80); \
00886     *_op_++ = (Byte)((_n >> 7) | 0x80); \
00887     *_op_++ = (Byte)((_n >> 14) & 0x7f); \
00888     break; \
00889   case 4: \
00890     *_op_++ = (Byte)(_n | 0x80); \
00891     *_op_++ = (Byte)((_n >> 7) | 0x80); \
00892     *_op_++ = (Byte)((_n >> 14) | 0x80); \
00893     *_op_++ = (Byte)((_n >> 21) & 0x7f); \
00894     break; \
00895   case 5: \
00896     *_op_++ = (Byte)(_n | 0x80); \
00897     *_op_++ = (Byte)((_n >> 7) | 0x80); \
00898     *_op_++ = (Byte)((_n >> 14) | 0x80); \
00899     *_op_++ = (Byte)((_n >> 21) | 0x80); \
00900     *_op_++ = (Byte)((_n >> 28) & 0x7f); \
00901     break; \
00902   case 6: \
00903     *_op_++ = (Byte)(_n | 0x80); \
00904     *_op_++ = (Byte)((_n >> 7) | 0x80); \
00905     *_op_++ = (Byte)((_n >> 14) | 0x80); \
00906     *_op_++ = (Byte)((_n >> 21) | 0x80); \
00907     *_op_++ = (Byte)((_n >> 28) | 0x80); \
00908     *_op_++ = (Byte)((_n >> 35) & 0x7f); \
00909     break; \
00910   case 7: \
00911     *_op_++ = (Byte)(_n | 0x80); \
00912     *_op_++ = (Byte)((_n >> 7) | 0x80); \
00913     *_op_++ = (Byte)((_n >> 14) | 0x80); \
00914     *_op_++ = (Byte)((_n >> 21) | 0x80); \
00915     *_op_++ = (Byte)((_n >> 28) | 0x80); \
00916     *_op_++ = (Byte)((_n >> 35) | 0x80); \
00917     *_op_++ = (Byte)((_n >> 42) & 0x7f); \
00918     break; \
00919   default: \
00920     BM_DIE("%s", "512TB ought to be enough for anyone!"); \
00921   } \
00922 } while (0)
00923 
00924 #define BM_DECODE_VINT(_n_) do { \
00925   BM_NEED_IN(1); \
00926   _n_ = (*ip++ & 0x7f); \
00927   if (ip[-1] & 0x80) { \
00928     BM_NEED_IN(1); \
00929     _n_ |= ((*ip++ & 0x7f) << 7); \
00930   } else break; \
00931   if (ip[-1] & 0x80) { \
00932     BM_NEED_IN(1); \
00933     _n_ |= ((*ip++ & 0x7f) << 14); \
00934   } else break; \
00935   if (ip[-1] & 0x80) { \
00936     BM_NEED_IN(1); \
00937     _n_ |= ((*ip++ & 0x7f) << 21); \
00938   } else break; \
00939   if (ip[-1] & 0x80) { \
00940     BM_NEED_IN(1); \
00941     _n_ |= ((UInt64)(*ip++ & 0x7f) << 28); \
00942   } else break; \
00943   if (ip[-1] & 0x80) { \
00944     BM_NEED_IN(1); \
00945     _n_ |= ((UInt64)(*ip++ & 0x7f) << 35); \
00946   } else break; \
00947   if (ip[-1] & 0x80) { \
00948     BM_NEED_IN(1); \
00949     _n_ |= ((UInt64)(*ip++ & 0x7f) << 42); \
00950   } \
00951 } while (0)
00952 
00953 #define BM_INT_WIDTH(_n_) \
00954   (_n_ > BM_MAX_5B ? 6 : \
00955    (_n_ > BM_MAX_4B ? 5 : \
00956     (_n_ > BM_MAX_3B ? 4 : \
00957      (_n_ > BM_MAX_2B ? 3 : \
00958       (_n_ > BM_MAX_1B ? 2 : 1)))))
00959 
00960 #define BM_VINT_WIDTH(_n_) \
00961   (_n_ > BM_MAX_V6B ? 7 : \
00962    (_n_ > BM_MAX_V5B ? 6 : \
00963     (_n_ > BM_MAX_V4B ? 5 : \
00964      (_n_ > BM_MAX_V3B ? 4 : \
00965       (_n_ > BM_MAX_V2B ? 3 : \
00966        (_n_ > BM_MAX_V1B ? 2 : 1))))))
00967 
00968 #define BM_ENCODE_DELTA(_op_, _ipos_, _pos_, _len_) do { \
00969   UInt64 _ipos = _ipos_, _len = _len_; \
00970   int pos_w = BM_INT_WIDTH(_ipos); \
00971   int len_w = BM_VINT_WIDTH(_len); \
00972   BM_NEED_OUT(pos_w + len_w + 1); \
00973   *_op_++ = BM_ESC; \
00974   BM_ENCODE_INT(_op_, _pos_, pos_w); \
00975   BM_ENCODE_VINT(_op_, _len_, len_w); \
00976 } while (0)
00977 
00978 #define BM_HANDLE_COLLISIONS(_randomize_, _init_hash_) do { \
00979   size_t adapts = lut.adapts, collisions = ++lut.collisions; \
00980   \
00981   if (adapts > BM_COLLISION_ABORT_THRESH) { \
00982     /* stumped & give up, just pass the bytes */ \
00983     BM_LOG(1, "too much collisions: %llu, giving up.\n", \
00984            (Llu)BM_ABORT_COLLISIONS); \
00985     goto output_overrun; \
00986   } \
00987   else if (collisions > collision_thresh) { \
00988     /* unlikely when the hash is good, so reinitialize hash, in case \
00989      * data is wacked/adversarial for some reasons. \
00990      */ \
00991     BM_LOG(2, "hash collision %llu: %llx: pos: %llu and %llu: [" BM_COLOR_DBG, \
00992            (Llu)lut.collisions, (Llu)hash, (Llu)pos, (Llu)i); \
00993     BM_LOG_(2, in + pos, fp_len); \
00994     BM_LOG(2, "%s", BM_COLOR_END "] vs [" BM_COLOR_ALT); \
00995     BM_LOG_(2, in + i, fp_len); \
00996     BM_LOG(2, "%s", BM_COLOR_END "] trying to adapt.\n"); \
00997     _randomize_; \
00998     boundary = last_i = i = offset; \
00999     _init_hash_; \
01000     init_bmlut(&lut, in_len / fp_len, work_mem); \
01001     lut.adapts = adapts + 1; \
01002     op = out + 1; \
01003     last_ip = in; \
01004     continue; \
01005   } \
01006 } while (0)
01007 
01008 #define BM_ENCODE_STAT(_msg_) do { \
01009   size_t c = lut.collisions; \
01010   BM_LOG(1, "%s: in: %llu, out: %llu, lut.collisions: %llu (eff. bits: %.3f) " \
01011          "thresh: %llu), lut.probes: %llu (%.3f/lu)\n", _msg_, (Llu)in_len, \
01012          (Llu)*out_len_p, (Llu)c, \
01013          c ? log((double)nfps * scan_len / c) / log(2) : sizeof(size_t) * 8., \
01014          (Llu)collision_thresh, (Llu)lut.probes, \
01015          (double)lut.probes / scan_len); \
01016 } while (0)
01017 
01018 /* The main algorithm */
01019 #define BM_ENCODE_BODY(_init_hash_, _update_hash_, _randomize_) \
01020   size_t i = offset, last_i = i, end_i = in_len - fp_len; \
01021   size_t boundary = i, scan_len = end_i - offset; \
01022   size_t nfps, hash, pos, collision_thresh; \
01023   Byte *in = (Byte *) src, *out = (Byte *) dst; \
01024   Byte *ip = in, *last_ip = ip, *in_end = in + in_len; \
01025   Byte *op = out, *out_end; \
01026   BmLut lut = { 0, 0, 0, 0, NULL }; \
01027   BmLutEntry *entry = NULL; \
01028   \
01029   BM_CHECK(fp_len > 0); \
01030   nfps = in_len / fp_len; \
01031   collision_thresh = s_collision_thresh ? s_collision_thresh \
01032                                         : scan_len / 1e9 * nfps + 8; \
01033   if (in_len <= offset + fp_len) BM_ENCODE_PASS; \
01034   \
01035   init_bmlut(&lut, nfps, work_mem); \
01036   _init_hash_; \
01037   \
01038   *op++ = 1; /* default */ \
01039   out_end = op + (*out_len_p < in_len ? *out_len_p : in_len);  \
01040   \
01041   for (; i <= end_i; ++i) { \
01042     if (i > last_i) { \
01043       BM_LOOKUP(lut, hash, entry); \
01044       pos = entry->pos; \
01045       \
01046       if (pos != BM_NPOS) { \
01047         Byte *ip0 = in + pos, *cp = in + i, *cp0 = cp; \
01048         Byte *ip1 = ip0 + fp_len, *cp1 = cp0 + fp_len; \
01049         \
01050         if (memcmp(ip0, cp0, fp_len) == 0) { \
01051           /* we have a match */ \
01052           size_t mlen = 0; \
01053           /* greedy backward up to fp_len - 1 */ \
01054           for (ip = ip0; ip > in && cp > last_ip && \
01055                mlen < fp_len - 1 && *--ip == *--cp; ++mlen); \
01056           cp = cp0 - mlen; \
01057           BM_ENCODE_OUTPUT(op, last_ip, cp); \
01058           pos = ip0 - mlen - in; \
01059           mlen += fp_len; \
01060           /* greedy forward as much as possible */ \
01061           for (ip = ip1, cp = cp1; cp < in_end && *ip++ == *cp++; ++mlen); \
01062           BM_ENCODE_DELTA(op, last_ip - in, pos, mlen); \
01063           last_ip = cp0 - (ip0 - (in + pos)) + mlen; \
01064           if (last_ip == in_end) break; \
01065           last_i = last_ip - in; \
01066         } \
01067         else BM_HANDLE_COLLISIONS(_randomize_, _init_hash_); \
01068       } \
01069     } \
01070     if ((i - offset) == boundary) { \
01071       if (i <= last_i) BM_LOOKUP(lut, hash, entry); \
01072       /* insert hash */ \
01073       entry->hash = hash; \
01074       entry->pos = i; \
01075       boundary += fp_len; \
01076     } \
01077     if (i < end_i) hash = _update_hash_; /* for next iter */ \
01078   } \
01079   /* copy the remaining bytes */ \
01080   BM_ENCODE_FINAL_OUTPUT(op, last_ip, in_end); \
01081   *out_len_p = op - out; \
01082   BM_ENCODE_STAT("success"); \
01083   \
01084   return BMZ_E_OK; \
01085   \
01086 output_overrun: \
01087   if (*out_len_p > in_len) BM_ENCODE_PASS; \
01088   BM_ENCODE_STAT("tip: make output buffer at least input length + 1"); \
01089   return BMZ_E_OUTPUT_OVERRUN
01090 
01091 /* External APIs for bm encoding */
01092 int
01093 bmz_bm_pack_mod(const void *src, size_t in_len, void *dst, size_t *out_len_p,
01094                 size_t offset, size_t fp_len, void *work_mem,
01095                 size_t b, size_t m) {
01096   size_t pow_n;
01097   BM_ENCODE_BODY(hash = r_hash_mod(in + i, fp_len, b, m);
01098                  pow_n = pow_mod(b, fp_len, m),
01099                  update_hash_mod(hash, in[i + fp_len], in[i], pow_n, b, m),
01100                  b = random_prime(0));
01101 }
01102 
01103 int
01104 bmz_bm_pack_mod16x2(const void *src, size_t in_len, void *dst,
01105                     size_t *out_len_p, size_t offset, size_t fp_len,
01106                     void *work_mem, size_t b1, size_t b2,
01107                     size_t m1, size_t m2) {
01108   size_t pow_n_b1, pow_n_b2;
01109   BM_ENCODE_BODY(hash = r_hash_mod16x2(in + i, fp_len, b1, b2, m1, m2);
01110                  pow_n_b1 = pow_mod32(b1, fp_len, m1);
01111                  pow_n_b2 = pow_mod32(b2, fp_len, m2),
01112                  update_hash_mod16x2(hash, in[i + fp_len], in[i],
01113                                      pow_n_b1, pow_n_b2, b1, b2, m1, m2),
01114                  b1 = random_prime(0);
01115                  b2 = random_prime(b1));
01116 }
01117 
01118 int
01119 bmz_bm_pack_mask16x2(const void *src, size_t in_len, void *dst,
01120                      size_t *out_len_p, size_t offset, size_t fp_len,
01121                      void *work_mem, size_t b1, size_t b2) {
01122   size_t pow_n_b1, pow_n_b2;
01123   BM_ENCODE_BODY(hash = r_hash_mask16x2(in + i, fp_len, b1, b2);
01124                  pow_n_b1 = pow_mask32(b1, fp_len, BM_MASK16);
01125                  pow_n_b2 = pow_mask32(b2, fp_len, BM_MASK16),
01126                  update_hash_mask16x2(hash, in[i + fp_len], in[i],
01127                                       pow_n_b1, pow_n_b2, b1, b2),
01128                  b1 = random_prime(0);
01129                  b2 = random_prime(b1));
01130 }
01131 
01132 int
01133 bmz_bm_pack_mask(const void *src, size_t in_len, void *dst, size_t *out_len_p,
01134                    size_t offset, size_t fp_len, void *work_mem, size_t b) {
01135   size_t pow_n;
01136   BM_ENCODE_BODY(hash = r_hash_mask(in + i, fp_len, b, BM_MASKSZ);
01137                  pow_n = pow_mask(b, fp_len, BM_MASKSZ),
01138                  update_hash_mask(hash, in[i + fp_len], in[i],
01139                                   pow_n, b, BM_MASKSZ),
01140                  b = random_prime(0));
01141 }
01142 
01143 int
01144 bmz_bm_pack_mask32x2(const void *src, size_t in_len, void *dst,
01145                      size_t *out_len_p, size_t offset, size_t fp_len,
01146                      void *work_mem, size_t b1, size_t b2) {
01147   size_t pow_n_b1, pow_n_b2;
01148   BM_ENCODE_BODY(hash = r_hash_mask32x2(in + i, fp_len, b1, b2);
01149                  pow_n_b1 = pow_mask(b1, fp_len, BM_MASK32);
01150                  pow_n_b2 = pow_mask(b2, fp_len, BM_MASK32),
01151                  update_hash_mask32x2(hash, in[i + fp_len], in[i],
01152                                       pow_n_b1, pow_n_b2, b1, b2),
01153                  b1 = random_prime(0);
01154                  b2 = random_prime(b1));
01155 }
01156 
01157 /* Max lz overhead for incompressible block */
01158 #define BM_LZ_MAX(_len_) ((_len_) / 16 + 64 + 3)
01159 
01160 size_t
01161 bmz_pack_buflen(size_t in_len) {
01162   return in_len + BM_LZ_MAX(in_len) + 1;
01163 }
01164 
01165 
01166 
01167 /* Workmem (lut) for bm pack */
01168 size_t
01169 bmz_bm_pack_worklen(size_t in_len, size_t fp_len) {
01170   return bm_lut_size(in_len / fp_len) * sizeof(BmLutEntry);
01171 }
01172 
01173 /* Size for compressor auxiliary memory */
01174 static size_t
01175 bmz_pack_auxlen(size_t in_len, size_t fp_len) {
01176   size_t lz_worklen = bmz_lz_pack_worklen(in_len) + 16;
01177   size_t bm_worklen = bmz_bm_pack_worklen(in_len, fp_len) + 16;
01178   return bm_worklen > lz_worklen ? bm_worklen : lz_worklen;
01179 }
01180 
01181 /* Including temporary space for bm output as well */
01182 size_t
01183 bmz_pack_worklen(size_t in_len, size_t fp_len) {
01184   BM_CHECK(fp_len > 0);
01185   return in_len + bmz_pack_auxlen(in_len, fp_len);
01186 }
01187 
01188 size_t
01189 bmz_unpack_worklen(size_t out_len) {
01190   return out_len + 1;
01191 }
01192 
01193 #define BMZ_PACK_BODY(_bm_pack_) \
01194   size_t tlen = in_len + 1; \
01195   Byte *dst = (Byte *)work_mem; \
01196   Byte *aux_mem = dst + tlen; \
01197   Byte *work_mem_aligned = BM_ALIGN(aux_mem, 8); \
01198   int ret; \
01199   \
01200   /* overlap flag assume the following memory layout    \
01201    * |=============== input/output =================|   \
01202    * |<------------- bmz_pack_buflen -------------->|   \
01203    * |<---------------- in_len ---------------->|       \
01204    * |<-- *out_len_p -->|                               \
01205    */                                                   \
01206   ret = _bm_pack_; \
01207   if (ret != BMZ_E_OK) return ret; \
01208   return bmz_lz_pack(dst, tlen, out, out_len_p, work_mem_aligned)
01209 
01210 int
01211 bmz_pack_mod(const void *in, size_t in_len, void *out, size_t *out_len_p,
01212              size_t offset, size_t fp_len, unsigned flags, void *work_mem,
01213              size_t b, size_t m) {
01214   BMZ_PACK_BODY(bmz_bm_pack_mod(in, in_len, dst, &tlen,
01215                 offset, fp_len, work_mem_aligned, b, m));
01216 }
01217 
01218 int
01219 bmz_pack_mod16x2(const void *in, size_t in_len, void *out, size_t *out_len_p,
01220                  size_t offset, size_t fp_len, unsigned flags, void *work_mem,
01221                  size_t b1, size_t b2, size_t m1, size_t m2) {
01222   BMZ_PACK_BODY(bmz_bm_pack_mod16x2(in, in_len, dst, &tlen,
01223                 offset, fp_len, work_mem_aligned, b1, b2, m1, m2));
01224 }
01225 
01226 int
01227 bmz_pack_mask16x2(const void *in, size_t in_len, void *out, size_t *out_len_p,
01228                   size_t offset, size_t fp_len, unsigned flags, void *work_mem,
01229                   size_t b1, size_t b2) {
01230   BMZ_PACK_BODY(bmz_bm_pack_mask16x2(in, in_len, dst, &tlen,
01231                 offset, fp_len, work_mem_aligned, b1, b2));
01232 }
01233 
01234 int
01235 bmz_pack_mask(const void *in, size_t in_len, void *out, size_t *out_len_p,
01236               size_t offset, size_t fp_len, unsigned flags, void *work_mem,
01237               size_t b) {
01238   BMZ_PACK_BODY(bmz_bm_pack_mask(in, in_len, dst, &tlen,
01239                 offset, fp_len, work_mem_aligned, b));
01240 }
01241 
01242 int
01243 bmz_pack_mask32x2(const void *in, size_t in_len, void *out, size_t *out_len_p,
01244                   size_t offset, size_t fp_len, unsigned flags, void *work_mem,
01245                   size_t b1, size_t b2) {
01246   BMZ_PACK_BODY(bmz_bm_pack_mask32x2(in, in_len, dst, &tlen,
01247                 offset, fp_len, work_mem_aligned, b1, b2));
01248 }
01249 
01250 int
01251 bmz_pack(const void *in, size_t in_len, void *out, size_t *out_len_p,
01252          size_t offset, size_t fp_len, unsigned flags, void *work_mem) {
01253   switch (flags >> 24) {
01254   case BMZ_HASH_MOD:
01255     return bmz_pack_mod(in, in_len, out, out_len_p, offset, fp_len, flags,
01256                         work_mem, BM_B1, BM_M1);
01257   case BMZ_HASH_MOD16X2:
01258     return bmz_pack_mod16x2(in, in_len, out, out_len_p, offset, fp_len, flags,
01259                             work_mem, BM_B1, BM_B2, BM_M1, BM_M2);
01260   case BMZ_HASH_MASK16X2:
01261     return bmz_pack_mask16x2(in, in_len, out, out_len_p, offset, fp_len, flags,
01262                              work_mem, BM_B1, BM_B2);
01263   case 0: /* default */
01264   case BMZ_HASH_MASK:
01265     return bmz_pack_mask(in, in_len, out, out_len_p, offset, fp_len, flags,
01266                          work_mem, BM_B1);
01267   case BMZ_HASH_MASK32X2:
01268     return bmz_pack_mask32x2(in, in_len, out, out_len_p, offset, fp_len, flags,
01269                              work_mem, BM_B1, BM_B2);
01270   default:
01271     BM_DIE("unknown hash algorithm: %u", (flags >> 24));
01272   }
01273   return BMZ_E_ERROR;
01274 }
01275 
01276 int
01277 bmz_unpack(const void *in, size_t in_len, void *out, size_t *out_len_p,
01278            void *work_mem) {
01279   Byte *dst = (Byte *)work_mem;
01280   size_t tlen = *out_len_p + 1;
01281   int ret;
01282 
01283   ret = bmz_lz_unpack((Byte *)in, in_len, dst, &tlen);
01284   if (ret != BMZ_E_OK) return ret;
01285   return bmz_bm_unpack(dst, tlen, out, out_len_p);
01286 }
01287 
01288 
01289 #define BM_DECODE_POS(_cpos_, _n_) do { \
01290   UInt64 _ipos = _cpos_; \
01291   int w = BM_INT_WIDTH(_ipos); \
01292   BM_NEED_IN(w); \
01293   \
01294   switch (w) { \
01295   case 1: \
01296     _n_ = *ip++; \
01297     break; \
01298   case 2: \
01299     _n_ = (*ip++ << 8); \
01300     _n_ |= *ip++; \
01301     break; \
01302   case 3: \
01303     _n_ = (*ip++ << 16); \
01304     _n_ |= (*ip++ << 8); \
01305     _n_ |= *ip++; \
01306     break; \
01307   case 4: \
01308     _n_ = (*ip++ << 24); \
01309     _n_ |= (*ip++ << 16); \
01310     _n_ |= (*ip++ << 8); \
01311     _n_ |= *ip++; \
01312     break; \
01313   case 5: \
01314     _n_ = ((UInt64)*ip++ << 32); \
01315     _n_ |= (*ip++ << 24); \
01316     _n_ |= (*ip++ << 16); \
01317     _n_ |= (*ip++ << 8); \
01318     _n_ |= *ip++; \
01319     break; \
01320   case 6: \
01321     _n_ = ((UInt64)*ip++ << 40); \
01322     _n_ |= ((UInt64)*ip++ << 32); \
01323     _n_ |= (*ip++ << 24); \
01324     _n_ |= (*ip++ << 16); \
01325     _n_ |= (*ip++ << 8); \
01326     _n_ |= *ip++; \
01327   default: \
01328     BM_DIE("%s", "256TB ought to be enough for anyone!"); \
01329   } \
01330 } while (0)
01331 
01332 #define BM_DECODE_LEN(_n_) BM_DECODE_VINT(_n_)
01333 
01334 int
01335 bmz_bm_unpack(const void *src, size_t in_len, void *dst, size_t *out_len_p) {
01336   Byte *in = (Byte *)src, *out = (Byte *)dst;
01337   Byte *ip = in, *last_ip = ip + 1, *op = out, *cp;
01338   Byte *in_end = ip + in_len, *out_end = op + *out_len_p;
01339   int remains;
01340 
01341   if (*ip++ == 0) {
01342     memcpy(out, ip, --in_len);
01343     *out_len_p = in_len;
01344     return BMZ_E_OK;
01345   }
01346 
01347   while (ip < in_end) {
01348     if (*ip == BM_ESC) {
01349       UInt64 len = ip - last_ip, pos = 0;
01350 
01351       if (len) {
01352         BM_NEED_OUT(len);
01353         memcpy(op, last_ip, len);
01354         op += len;
01355         last_ip = ip;
01356       }
01357       BM_NEED_IN(1);
01358 
01359       if (ip[1] == BM_ESC) {
01360         ++last_ip;
01361         ip += 2;
01362       }
01363       else {
01364         ++ip;
01365         BM_DECODE_POS(op - out, pos);
01366         BM_DECODE_LEN(len);
01367         BM_NEED_OUT(len);
01368         last_ip = ip;
01369         cp = out + pos;
01370 
01371         while (len--) *op++ = *cp++;
01372       }
01373     }
01374     else ++ip;
01375   }
01376   remains = in_end - last_ip;
01377   BM_NEED_OUT(remains);
01378   memcpy(op, last_ip, remains);
01379   *out_len_p = op - out + remains;
01380 
01381   return BMZ_E_OK;
01382 
01383 input_overrun:
01384   *out_len_p = op - out;
01385   return BMZ_E_INPUT_OVERRUN;
01386 
01387 output_overrun:
01388   *out_len_p = op - out;
01389   return BMZ_E_OUTPUT_OVERRUN;
01390 }
01391 
01392 int
01393 bmz_bm_dump(const void *src, size_t in_len) {
01394   Byte *in = (Byte *)src, *ip = in, *in_end = ip + in_len;
01395   UInt64 pos = 0, len, cpos = 0, tot_pte_sz = 0, tot_ptr_sz = 0, nptrs = 0;
01396   int is_on = *ip++;
01397 
01398   printf(BM_COLOR_DBG "%d" BM_COLOR_END, is_on);
01399 
01400   if (!is_on) {
01401     fwrite(ip, 1, in_end - ip, stdout);
01402     return BMZ_E_OK;
01403   }
01404 
01405   while (ip < in_end) {
01406     if (*ip == BM_ESC) {
01407       Byte *ip0 = ip;
01408       BM_NEED_IN(1);
01409 
01410       if (ip[1] != BM_ESC) {
01411         ++ip;
01412         BM_DECODE_POS(cpos, pos);
01413         BM_DECODE_LEN(len);
01414         printf(BM_COLOR_DBG "<%llu,%llu>" BM_COLOR_END,
01415                (unsigned long long)pos, (unsigned long long)len);
01416         cpos += len;
01417         tot_pte_sz += len;
01418         tot_ptr_sz += ip - ip0;
01419         ++nptrs;
01420       }
01421       else {
01422         putchar(*ip++);
01423         putchar(*ip++);
01424         cpos += 2;
01425       }
01426     }
01427     else {
01428       putchar(*ip++);
01429       ++cpos;
01430     }
01431   }
01432   BM_LOG(1, "%llu pointers, avg pointee size: %.3f, avg pointer size: %.3f\n",
01433          (unsigned long long)nptrs, (double)tot_pte_sz / nptrs,
01434          (double)tot_ptr_sz / nptrs);
01435   return BMZ_E_OK;
01436 
01437 input_overrun:
01438   return BMZ_E_INPUT_OVERRUN;
01439 }
01440 
01441 int
01442 bmz_init() {
01443   int ret = lzo_init();
01444   return ret == LZO_E_OK ? BMZ_E_OK : BMZ_E_ERROR;
01445 }
01446 
01447 int
01448 bmz_lz_pack(const void *in, size_t in_len, void *out, size_t *out_len_p,
01449             void *work_mem) {
01450   lzo_uint olen = *out_len_p;
01451   int ret = lzo1x_1_compress((Byte *)in, in_len, (Byte *)out, &olen, work_mem);
01452   *out_len_p = olen;
01453   return ret;
01454 }
01455 
01456 size_t
01457 bmz_lz_pack_worklen(size_t in_len) {
01458   return LZO1X_MEM_COMPRESS;
01459 }
01460 
01461 int
01462 bmz_lz_unpack(const void *in, size_t in_len, void *out, size_t *out_len_p) {
01463   lzo_uint olen = *out_len_p;
01464   int ret = lzo1x_decompress((Byte *)in, in_len, (Byte *)out, &olen, NULL);
01465   *out_len_p = olen;
01466   return ret;
01467 }
01468 
01469 unsigned
01470 bmz_checksum(const void *in, size_t in_len) {
01471   return lzo_adler32(1, (Byte *)in, in_len);
01472 }
01473 
01474 unsigned
01475 bmz_update_checksum(unsigned s, const void *in, size_t in_len) {
01476   return lzo_adler32(s, (Byte *)in, in_len);
01477 }
01478 
01479 /* vim: et sw=2
01480  */