00001
00020 #include <stdio.h>
00021 #include <stdlib.h>
00022 #include <string.h>
00023 #include <sys/stat.h>
00024 #include <errno.h>
00025 #include <fcntl.h>
00026 #include <unistd.h>
00027 #include "bmz-internal.h"
00028 #include "test-helper.h"
00029 #ifndef HT_NO_MMAP
00030 #include <sys/mman.h>
00031 static size_t s_no_mmap = 0;
00032 #else
00033 static size_t s_no_mmap = 1;
00034 #endif
00035
00036
00037 typedef long unsigned Lu;
00038
00039 #define TIMES(_n_, _code_) do { \
00040 size_t _n = _n_; \
00041 while (_n--) { _code_; } \
00042 } while (0)
00043
00044 #define BENCH(_label_, _code_, _n_, _times_) do { \
00045 double t1; \
00046 HT_MEASURE(t1, TIMES(_times_, _code_)); \
00047 printf("%16s: %.3fs (%.3fMB/s)\n", \
00048 _label_, t1, (_n_) / 1e6 / t1 *(_times_)); \
00049 fflush(stdout); fflush(stderr); \
00050 } while (0)
00051
00052 #define LOG(_lvl_, _fmt_, ...) if (s_verbosity >= _lvl_) do { \
00053 fprintf(stderr, "%s: " _fmt_ "\n", __FUNCTION__, ##__VA_ARGS__); \
00054 } while(0)
00055
00056 #define DIE(_fmt_, ...) do { \
00057 LOG(0, "fatal: " _fmt_, ##__VA_ARGS__); \
00058 exit(1); \
00059 } while (0)
00060
00061
00062 #define O_BENCH_HASH (1 << 0)
00063 #define O_BENCH_LUT (1 << 1)
00064 #define O_CHECK_HASH (1 << 2)
00065 #define O_MEMCPY (1 << 3)
00066 #define O_HASH_MOD (1 << 4)
00067 #define O_HASH_MOD16X2 (1 << 5)
00068 #define O_HASH_MASK16X2 (1 << 6)
00069 #define O_HASH_MASK (1 << 7)
00070 #define O_HASH_MASK32X2 (1 << 8)
00071 #define O_DEFAULT (0xffffffff & ~O_CHECK_HASH)
00072 #define O_HASHES 0xfffffff0
00073
00074
00075 static size_t s_options = 0;
00076 static size_t s_fp_len = 20;
00077 static size_t s_offset = 0;
00078 static size_t s_times = 1;
00079 static int s_verbosity = 0;
00080 static int s_bm_dump = 0;
00081 static int s_show_hash = 0;
00082
00083
00084
00085
00086 static size_t s_b1 = 257;
00087 static size_t s_b2 = 277;
00088 static size_t s_m = 0xffffffff;
00089 static size_t s_m1 = 0xffff;
00090 static size_t s_m2 = (0xffff - 4);
00091
00092 static void
00093 dump_bm(const char *label, const char *in, size_t len) {
00094 int ret;
00095
00096 if (s_verbosity > 1) {
00097 printf("----%s encoded:\n", label);
00098 fwrite(in, 1, len, stdout);
00099 }
00100 if (s_bm_dump) {
00101 printf("\n----%s dumped:\n", label);
00102
00103 if ((ret = bmz_bm_dump(in, len)) != BMZ_E_OK)
00104 LOG(1, "error: bad encoded data (ret=%d)", ret);
00105
00106 puts("\n----end-dump");
00107 }
00108 }
00109
00110 static void
00111 test_bm_mod(const char *in, size_t len, char *out, size_t *len_p,
00112 void *work_mem) {
00113 bmz_bm_pack_mod(in, len, out, len_p, s_offset, s_fp_len, work_mem, s_b1, s_m);
00114 dump_bm("mod", out, *len_p);
00115 }
00116
00117 static void
00118 test_bm_mod16x2(const char *in, size_t len, char *out, size_t *len_p,
00119 void *work_mem) {
00120 bmz_bm_pack_mod16x2(in, len, out, len_p, s_offset, s_fp_len,
00121 work_mem, s_b1, s_b2, s_m1, s_m2);
00122 dump_bm("mod16x2", out, *len_p);
00123 }
00124
00125 static void
00126 test_bm_mask16x2(const char *in, size_t len, char *out, size_t *len_p,
00127 void *work_mem) {
00128 bmz_bm_pack_mask16x2(in, len, out, len_p, s_offset, s_fp_len,
00129 work_mem, s_b1, s_b2);
00130 dump_bm("mask16x2", out, *len_p);
00131 }
00132
00133 static void
00134 test_bm_mask(const char *in, size_t len, char *out, size_t *len_p,
00135 void *work_mem) {
00136 bmz_bm_pack_mask(in, len, out, len_p, s_offset, s_fp_len, work_mem, s_b1);
00137 dump_bm("mask", out, *len_p);
00138 }
00139
00140 static void
00141 test_bm_mask32x2(const char *in, size_t len, char *out, size_t *len_p,
00142 void *work_mem) {
00143 bmz_bm_pack_mask32x2(in, len, out, len_p, s_offset, s_fp_len,
00144 work_mem, s_b1, s_b2);
00145 dump_bm("mask32x2", out, *len_p);
00146 }
00147
00148 static void
00149 test_bm_unpack(const char *in, size_t len, char *out, size_t *len_p) {
00150 int ret = bmz_bm_unpack(in, len, out, len_p);
00151 LOG(1, "\nbm_unpack returned %d, size: %lu\n", ret, (Lu)*len_p);
00152 if (s_verbosity < 2) return;
00153 puts("bm decoded:");
00154 fwrite(out, 1, *len_p, stdout);
00155 puts("\nend-decoded");
00156 }
00157
00158 static char *
00159 read_from_fp(FILE *fp, size_t *len_p) {
00160 char *data = NULL;
00161 char buf[65536];
00162 size_t len = 0, size = 0, ret;
00163
00164 while ((ret = fread(buf, 1, sizeof(buf), fp)) > 0) {
00165 len += ret;
00166
00167 if (len > size) {
00168 size = (len + 16) * 3 / 2;
00169 data = realloc(data, size);
00170 }
00171 memcpy(data + len - ret, buf, ret);
00172 }
00173 *len_p = len;
00174 return data;
00175 }
00176
00177 static void
00178 print_hash(const char *label, size_t h) {
00179 printf("%16s: %lx\n", label, (Lu)h);
00180 }
00181
00182 static void
00183 show_hash(const char *data, size_t len) {
00184
00185 if (s_options & O_HASH_MOD)
00186 print_hash("hash-mod", bmz_hash_mod(data, len, s_b1, s_m));
00187
00188 if (s_options & O_HASH_MOD16X2)
00189 print_hash("hash-mod16x2", bmz_hash_mod16x2(data, len, s_b1, s_b2,
00190 s_m1, s_m2));
00191 if (s_options & O_HASH_MASK16X2)
00192 print_hash("hash-mask16X2", bmz_hash_mask16x2(data, len, s_b1, s_b2));
00193
00194 if (s_options & O_HASH_MASK)
00195 print_hash("hash-mask", bmz_hash_mask(data, len, s_b1));
00196
00197 if (s_options & O_HASH_MASK32X2)
00198 print_hash("hash-mask32x2", bmz_hash_mask32x2(data, len, s_b1, s_b2));
00199 }
00200
00201 static void
00202 test_from_string(const char *data, size_t len) {
00203 char *buf, *mem;
00204 size_t n = s_times;
00205 int opt = s_options;
00206 size_t out_len, out_len0, len2 = len, work_len;
00207
00208 if (s_show_hash) {
00209 show_hash(data, len);
00210 return;
00211 }
00212
00213 out_len0 = out_len = bmz_pack_buflen(len);
00214 buf = malloc(out_len);
00215 work_len = bmz_bm_pack_worklen(len, s_fp_len);
00216 mem = malloc(work_len);
00217 LOG(1, "input length: %lu, out_len %lu, work_len: %lu\n",
00218 (Lu)len, (Lu)out_len, (Lu)work_len);
00219
00220
00221 if (opt & O_MEMCPY) {
00222 BENCH("memcpy", memcpy(buf, data, len), len, n);
00223 }
00224
00225 if (opt & O_CHECK_HASH) {
00226
00227 if (opt & O_HASH_MOD)
00228 HT_CHECK(bmz_check_hash_mod(data, len, s_fp_len, s_b1, s_m) == BMZ_E_OK);
00229
00230 if (opt & O_HASH_MOD16X2)
00231 HT_CHECK(bmz_check_hash_mod16x2(data, len, s_fp_len,
00232 s_b1, s_b2, s_m1, s_m2) == BMZ_E_OK);
00233 if (opt & O_HASH_MASK16X2)
00234 HT_CHECK(bmz_check_hash_mask16x2(data, len, s_fp_len, s_b1, s_b2)
00235 == BMZ_E_OK);
00236
00237 if (opt & O_HASH_MASK)
00238 HT_CHECK(bmz_check_hash_mask(data, len, s_fp_len, s_b1) == BMZ_E_OK);
00239
00240 if (opt & O_HASH_MASK32X2)
00241 HT_CHECK(bmz_check_hash_mask32x2(data, len, s_fp_len, s_b1, s_b2)
00242 == BMZ_E_OK);
00243 }
00244
00245 if (opt & O_BENCH_HASH) {
00246
00247 if (opt & O_HASH_MOD)
00248 BENCH("hash mod", bmz_bench_hash(data, len, BMZ_HASH_MOD), len, n);
00249
00250 if (opt & O_HASH_MOD16X2)
00251 BENCH("hash mod16x2", bmz_bench_hash(data, len, BMZ_HASH_MOD16X2),
00252 len, n);
00253
00254 if (opt & O_HASH_MASK16X2)
00255 BENCH("hash mask16x2", bmz_bench_hash(data, len, BMZ_HASH_MASK16X2),
00256 len, n);
00257
00258 if (opt & O_HASH_MASK)
00259 BENCH("hash mask", bmz_bench_hash(data, len, BMZ_HASH_MASK), len, n);
00260
00261 if (opt & O_HASH_MASK32X2)
00262 BENCH("hash mask32x2", bmz_bench_hash(data, len, BMZ_HASH_MASK32X2),
00263 len, n);
00264 }
00265
00266 if (opt & O_BENCH_LUT) {
00267
00268 if (opt & O_HASH_MOD)
00269 BENCH("lut mod", bmz_bench_lut_mod(data, len, s_fp_len, mem, s_b1, s_m),
00270 len, n);
00271
00272 if (opt & O_HASH_MOD16X2)
00273 BENCH("lut mod16x2", bmz_bench_lut_mod16x2(data, len, s_fp_len, mem,
00274 s_b1, s_b2, s_m1, s_m2), len, n);
00275
00276 if (opt & O_HASH_MASK16X2)
00277 BENCH("lut mask16x2", bmz_bench_lut_mask16x2(data, len, s_fp_len, mem,
00278 s_b1, s_b2), len, n);
00279
00280 if (opt & O_HASH_MASK)
00281 BENCH("lut mask", bmz_bench_lut_mask(data, len, s_fp_len, mem, s_b1),
00282 len, n);
00283
00284 if (opt & O_HASH_MASK32X2)
00285 BENCH("lut mask32x2", bmz_bench_lut_mask32x2(data, len, s_fp_len, mem,
00286 s_b1, s_b2), len, n);
00287 }
00288
00289 if (opt != O_DEFAULT && (opt & 0xf)) return;
00290
00291 if (opt & O_HASH_MOD) {
00292 BENCH("bm pack mod", test_bm_mod(data, len, buf, &out_len, mem), len, n);
00293 BENCH("bm unpack", test_bm_unpack(buf, out_len, buf + out_len, &len2),
00294 len, n);
00295 HT_CHECK(len == len2);
00296 HT_CHECK(memcmp(data, buf + out_len, len) == 0);
00297 }
00298
00299 if (opt & O_HASH_MOD16X2) {
00300 memset(buf, 0, out_len); out_len = out_len0;
00301 BENCH("bm pack mod16x2", test_bm_mod16x2(data, len, buf, &out_len, mem),
00302 len, n);
00303 BENCH("bm unpack", test_bm_unpack(buf, out_len, buf + out_len, &len2),
00304 len, n);
00305 HT_CHECK(len == len2);
00306 HT_CHECK(memcmp(data, buf + out_len, len) == 0);
00307 }
00308
00309 if (opt & O_HASH_MASK16X2) {
00310 memset(buf, 0, out_len); out_len = out_len0;
00311 BENCH("bm pack mask16x2", test_bm_mask16x2(data, len, buf, &out_len, mem),
00312 len, n);
00313 BENCH("bm unpack", test_bm_unpack(buf, out_len, buf + out_len, &len2),
00314 len, n);
00315 HT_CHECK(len == len2);
00316 HT_CHECK(memcmp(data, buf + out_len, len) == 0);
00317 }
00318
00319 if (opt & O_HASH_MASK) {
00320 memset(buf, 0, out_len); out_len = out_len0;
00321 BENCH("bm pack mask", test_bm_mask(data, len, buf, &out_len, mem), len, n);
00322 BENCH("bm unpack", test_bm_unpack(buf, out_len, buf + out_len, &len2),
00323 len, n);
00324 HT_CHECK(len == len2);
00325 HT_CHECK(memcmp(data, buf + out_len, len) == 0);
00326 }
00327
00328 if (opt & O_HASH_MASK32X2) {
00329 memset(buf, 0, out_len); out_len = out_len0;
00330 BENCH("bm pack mask32x2", test_bm_mask32x2(data, len, buf, &out_len, mem),
00331 len, n);
00332 BENCH("bm unpack", test_bm_unpack(buf, out_len, buf + out_len, &len2),
00333 len, n);
00334 HT_CHECK(len == len2);
00335 HT_CHECK(memcmp(data, buf + out_len, len) == 0);
00336 }
00337 }
00338
00339 static void
00340 test_from_stdin() {
00341 size_t len;
00342 char *data = read_from_fp(stdin, &len);
00343 test_from_string(data, len);
00344 }
00345
00346 static void
00347 test_from_file(const char *fname) {
00348 int fd = open(fname, O_RDONLY, 0);
00349 char *data = NULL;
00350 struct stat st;
00351 long len;
00352
00353 if (fd == -1) DIE("cannot open '%s'", fname);
00354
00355 if (fstat(fd, &st) != 0) DIE("stat failed on '%s'", fname);
00356
00357 len = st.st_size;
00358
00359 if (!s_no_mmap) {
00360 #ifndef HT_NO_MMAP
00361 LOG(1, "mmaping %ld bytes in to memory...", len);
00362 data = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
00363
00364 if ((char *)-1 == data) {
00365 LOG(0, "mmap failed on '%s', trying alternative...", fname);
00366 errno = 0;
00367 data = NULL;
00368 }
00369 #endif
00370 }
00371
00372 if (!data) {
00373 LOG(1, "reading %ld bytes in to memory...", len);
00374
00375 data = malloc(len);
00376
00377 if (!data) DIE("error alloc'ing %ld bytes", len);
00378
00379 if ((len = read(fd, data, len)) != st.st_size)
00380 DIE("error reading %s (expecting %ld bytes, got %ld",
00381 fname, (long)st.st_size, len);
00382 }
00383 else {
00384 }
00385
00386 test_from_string(data, st.st_size);
00387 }
00388
00389 static void
00390 show_usage() {
00391 printf("usage: bmz-test [options] [string...]\n%s%s%s",
00392 "--help show this help\n"
00393 "--verbose[=level] set verbose level\n"
00394 "--file <file> use <file as input\n"
00395 "--offset <number> set bm offset\n"
00396 "--fp-len <number> set bm fingerprint length\n"
00397 "--hash compute hash value only\n"
00398 "--b1 <number> hash param b1\n"
00399 "--b2 <number> hash param b2\n"
00400 "--m <number> hash param m\n",
00401 "--m1 <number> hash param m1\n"
00402 "--m2 <number> hash param m2\n"
00403 "--hash-mod use hash-mod\n"
00404 "--hash-mod16x2 use hash-mod16x2\n"
00405 "--hash-mask16x2 use hash-mask16x2\n"
00406 "--hash-mask use hash-mask\n",
00407 "--hash-mask32x2 use hash-mask32x2\n"
00408 "--bench-hash bechmarks for builtin hashes\n"
00409 "--check-hash verify rolling hashes\n"
00410 "--bench-lut benchmarks for lookup table\n"
00411 "--times <number> number of repeats for the test\n"
00412 );
00413 exit(0);
00414 }
00415
00416 int
00417 main(int ac, char *av[]) {
00418 char **ia = av + 1, **a_end = av + ac, *ep;
00419 const char *fname = NULL;
00420
00421 for (; ia < a_end; ++ia) {
00422 if (!strcmp("--fp-len", *ia)) s_fp_len = atoi(*++ia);
00423 else if (!strcmp("--offset", *ia)) s_offset = atoi(*++ia);
00424 else if (!strcmp("--times", *ia)) s_times = atoi(*++ia);
00425 else if (!strcmp("--hash", *ia)) s_show_hash = 1;
00426 else if (!strcmp("--b1", *ia)) s_b1 = atoi(*++ia);
00427 else if (!strcmp("--b2", *ia)) s_b2 = atoi(*++ia);
00428 else if (!strcmp("--m", *ia)) s_m = strtol(*++ia, &ep, 0);
00429 else if (!strcmp("--m1", *ia)) s_m1 = strtol(*++ia, &ep, 0);
00430 else if (!strcmp("--m2", *ia)) s_m2 = strtol(*++ia, &ep, 0);
00431 else if (!strcmp("--file", *ia)) fname = *++ia;
00432 else if (!strcmp("--no-mmap", *ia)) s_no_mmap = 1;
00433 else if (!strcmp("--verbose", *ia)) s_verbosity = 1;
00434 else if (!strncmp("--verbose=", *ia, 10)) s_verbosity = atoi(*ia + 10);
00435 else if (!strcmp("--bm-dump", *ia)) s_bm_dump = 1;
00436 else if (!strcmp("--bench-hash", *ia)) s_options |= O_BENCH_HASH;
00437 else if (!strcmp("--check-hash", *ia)) s_options |= O_CHECK_HASH;
00438 else if (!strcmp("--bench-lut", *ia)) s_options |= O_BENCH_LUT;
00439 else if (!strcmp("--hash-mod", *ia)) s_options |= O_HASH_MOD;
00440 else if (!strcmp("--hash-mod16x2", *ia)) s_options |= O_HASH_MOD16X2;
00441 else if (!strcmp("--hash-mask16x2", *ia)) s_options |= O_HASH_MASK16X2;
00442 else if (!strcmp("--hash-mask", *ia)) s_options |= O_HASH_MASK;
00443 else if (!strcmp("--hash-mask32x2", *ia)) s_options |= O_HASH_MASK32X2;
00444 else if (!strcmp("--help", *ia)) show_usage();
00445 else if (!strcmp("--", *ia)) {
00446 ++ia;
00447 break;
00448 }
00449 else if ('-' == **ia) {
00450 DIE("unknown option: %s\n", *ia);
00451 }
00452 else break;
00453 }
00454 bmz_set_verbosity(s_verbosity);
00455
00456 if (!s_options) s_options = O_DEFAULT;
00457 else if (!(s_options & O_HASHES)) s_options |= O_HASHES;
00458
00459 if (fname)
00460 test_from_file(fname);
00461 else if (ia >= a_end)
00462 test_from_stdin();
00463 else for (; ia < a_end; ++ia)
00464 test_from_string(*ia, strlen(*ia));
00465
00466 return 0;
00467 }
00468
00469
00470