bmz-test.c

Go to the documentation of this file.
00001 
00020 #include <stdio.h>
00021 #include <stdlib.h>
00022 #include <string.h>
00023 #include <sys/stat.h>
00024 #include <errno.h>
00025 #include <fcntl.h>
00026 #include <unistd.h>
00027 #include "bmz-internal.h"
00028 #include "test-helper.h"
00029 #ifndef HT_NO_MMAP
00030 #include <sys/mman.h>
00031 static size_t s_no_mmap = 0;
00032 #else
00033 static size_t s_no_mmap = 1;
00034 #endif
00035 
00036 /* To silence warnings in format strings */
00037 typedef long unsigned Lu;
00038 
00039 #define TIMES(_n_, _code_) do { \
00040   size_t _n = _n_; \
00041   while (_n--) { _code_; } \
00042 } while (0)
00043 
00044 #define BENCH(_label_, _code_, _n_, _times_) do { \
00045   double t1; \
00046   HT_MEASURE(t1, TIMES(_times_, _code_)); \
00047   printf("%16s: %.3fs (%.3fMB/s)\n", \
00048          _label_, t1, (_n_) / 1e6 / t1 *(_times_)); \
00049   fflush(stdout); fflush(stderr); \
00050 } while (0)
00051 
00052 #define LOG(_lvl_, _fmt_, ...) if (s_verbosity >= _lvl_) do { \
00053   fprintf(stderr, "%s: " _fmt_ "\n", __FUNCTION__, ##__VA_ARGS__); \
00054 } while(0)
00055 
00056 #define DIE(_fmt_, ...) do { \
00057   LOG(0, "fatal: " _fmt_, ##__VA_ARGS__); \
00058   exit(1); \
00059 } while (0)
00060 
00061 /* options */
00062 #define O_BENCH_HASH    (1 << 0)
00063 #define O_BENCH_LUT     (1 << 1)
00064 #define O_CHECK_HASH    (1 << 2)
00065 #define O_MEMCPY        (1 << 3)
00066 #define O_HASH_MOD      (1 << 4)
00067 #define O_HASH_MOD16X2  (1 << 5)
00068 #define O_HASH_MASK16X2 (1 << 6)
00069 #define O_HASH_MASK     (1 << 7)
00070 #define O_HASH_MASK32X2 (1 << 8)
00071 #define O_DEFAULT       (0xffffffff & ~O_CHECK_HASH)
00072 #define O_HASHES        0xfffffff0
00073 
00074 /* defaults */
00075 static size_t s_options = 0;
00076 static size_t s_fp_len = 20;
00077 static size_t s_offset = 0;
00078 static size_t s_times = 1;
00079 static int s_verbosity = 0;
00080 static int s_bm_dump = 0;
00081 static int s_show_hash = 0;
00082 /* From Andrew Trigdell's thesis p72-73:
00083  * D1D3: b1=3, b2=7, m1=0xffff m2=(0xffff - 4)
00084  * D3D4: b1=7, b2=17, m1=(0xffff - 4), m2=(0xffff - 6)
00085  */
00086 static size_t s_b1 = 257;
00087 static size_t s_b2 = 277;
00088 static size_t s_m = 0xffffffff;
00089 static size_t s_m1 = 0xffff;
00090 static size_t s_m2 = (0xffff - 4);
00091 
00092 static void
00093 dump_bm(const char *label, const char *in, size_t len) {
00094   int ret;
00095 
00096   if (s_verbosity > 1) {
00097     printf("----%s encoded:\n", label);
00098     fwrite(in, 1, len, stdout);
00099   }
00100   if (s_bm_dump) {
00101     printf("\n----%s dumped:\n", label);
00102 
00103     if ((ret = bmz_bm_dump(in, len)) != BMZ_E_OK)
00104       LOG(1, "error: bad encoded data (ret=%d)", ret);
00105 
00106     puts("\n----end-dump");
00107   }
00108 }
00109 
00110 static void
00111 test_bm_mod(const char *in, size_t len, char *out, size_t *len_p,
00112             void *work_mem) {
00113   bmz_bm_pack_mod(in, len, out, len_p, s_offset, s_fp_len, work_mem, s_b1, s_m);
00114   dump_bm("mod", out, *len_p);
00115 }
00116 
00117 static void
00118 test_bm_mod16x2(const char *in, size_t len, char *out, size_t *len_p,
00119              void *work_mem) {
00120   bmz_bm_pack_mod16x2(in, len, out, len_p, s_offset, s_fp_len,
00121                       work_mem, s_b1, s_b2, s_m1, s_m2);
00122   dump_bm("mod16x2", out, *len_p);
00123 }
00124 
00125 static void
00126 test_bm_mask16x2(const char *in, size_t len, char *out, size_t *len_p,
00127                  void *work_mem) {
00128   bmz_bm_pack_mask16x2(in, len, out, len_p, s_offset, s_fp_len,
00129                        work_mem, s_b1, s_b2);
00130   dump_bm("mask16x2", out, *len_p);
00131 }
00132 
00133 static void
00134 test_bm_mask(const char *in, size_t len, char *out, size_t *len_p,
00135              void *work_mem) {
00136   bmz_bm_pack_mask(in, len, out, len_p, s_offset, s_fp_len, work_mem, s_b1);
00137   dump_bm("mask", out, *len_p);
00138 }
00139 
00140 static void
00141 test_bm_mask32x2(const char *in, size_t len, char *out, size_t *len_p,
00142                  void *work_mem) {
00143   bmz_bm_pack_mask32x2(in, len, out, len_p, s_offset, s_fp_len,
00144                        work_mem, s_b1, s_b2);
00145   dump_bm("mask32x2", out, *len_p);
00146 }
00147 
00148 static void
00149 test_bm_unpack(const char *in, size_t len, char *out, size_t *len_p) {
00150   int ret = bmz_bm_unpack(in, len, out, len_p);
00151   LOG(1, "\nbm_unpack returned %d, size: %lu\n", ret, (Lu)*len_p);
00152   if (s_verbosity < 2) return;
00153   puts("bm decoded:");
00154   fwrite(out, 1, *len_p, stdout);
00155   puts("\nend-decoded");
00156 }
00157 
00158 static char *
00159 read_from_fp(FILE *fp, size_t *len_p) {
00160   char *data = NULL;
00161   char buf[65536];
00162   size_t len = 0, size = 0, ret;
00163 
00164   while ((ret = fread(buf, 1, sizeof(buf), fp)) > 0) {
00165     len += ret;
00166 
00167     if (len > size) {
00168       size = (len + 16) * 3 / 2;
00169       data = realloc(data, size);
00170     }
00171     memcpy(data + len - ret, buf, ret);
00172   }
00173   *len_p = len;
00174   return data;
00175 }
00176 
00177 static void
00178 print_hash(const char *label, size_t h) {
00179   printf("%16s: %lx\n", label, (Lu)h);
00180 }
00181 
00182 static void
00183 show_hash(const char *data, size_t len) {
00184 
00185   if (s_options & O_HASH_MOD)
00186     print_hash("hash-mod", bmz_hash_mod(data, len, s_b1, s_m));
00187 
00188   if (s_options & O_HASH_MOD16X2)
00189     print_hash("hash-mod16x2", bmz_hash_mod16x2(data, len, s_b1, s_b2,
00190                                                 s_m1, s_m2));
00191   if (s_options & O_HASH_MASK16X2)
00192     print_hash("hash-mask16X2", bmz_hash_mask16x2(data, len, s_b1, s_b2));
00193 
00194   if (s_options & O_HASH_MASK)
00195     print_hash("hash-mask", bmz_hash_mask(data, len, s_b1));
00196 
00197   if (s_options & O_HASH_MASK32X2)
00198     print_hash("hash-mask32x2", bmz_hash_mask32x2(data, len, s_b1, s_b2));
00199 }
00200 
00201 static void
00202 test_from_string(const char *data, size_t len) {
00203   char *buf, *mem;
00204   size_t n = s_times;
00205   int opt = s_options;
00206   size_t out_len, out_len0, len2 = len, work_len;
00207 
00208   if (s_show_hash) {
00209     show_hash(data, len);
00210     return;
00211   }
00212 
00213   out_len0 = out_len = bmz_pack_buflen(len);
00214   buf = malloc(out_len);
00215   work_len = bmz_bm_pack_worklen(len, s_fp_len);
00216   mem = malloc(work_len);
00217   LOG(1, "input length: %lu, out_len %lu, work_len: %lu\n",
00218       (Lu)len, (Lu)out_len, (Lu)work_len);
00219 
00220   /* memcpy/memmove for comparison */
00221   if (opt & O_MEMCPY) {
00222     BENCH("memcpy", memcpy(buf, data, len), len, n);
00223   }
00224 
00225   if (opt & O_CHECK_HASH) {
00226 
00227     if (opt & O_HASH_MOD)
00228       HT_CHECK(bmz_check_hash_mod(data, len, s_fp_len, s_b1, s_m) == BMZ_E_OK);
00229 
00230     if (opt & O_HASH_MOD16X2)
00231       HT_CHECK(bmz_check_hash_mod16x2(data, len, s_fp_len,
00232                                       s_b1, s_b2, s_m1, s_m2) == BMZ_E_OK);
00233     if (opt & O_HASH_MASK16X2)
00234       HT_CHECK(bmz_check_hash_mask16x2(data, len, s_fp_len, s_b1, s_b2)
00235                == BMZ_E_OK);
00236 
00237     if (opt & O_HASH_MASK)
00238       HT_CHECK(bmz_check_hash_mask(data, len, s_fp_len, s_b1) == BMZ_E_OK);
00239 
00240     if (opt & O_HASH_MASK32X2)
00241       HT_CHECK(bmz_check_hash_mask32x2(data, len, s_fp_len, s_b1, s_b2)
00242                == BMZ_E_OK);
00243   }
00244 
00245   if (opt & O_BENCH_HASH) {
00246 
00247     if (opt & O_HASH_MOD)
00248       BENCH("hash mod", bmz_bench_hash(data, len, BMZ_HASH_MOD), len, n);
00249 
00250     if (opt & O_HASH_MOD16X2)
00251       BENCH("hash mod16x2", bmz_bench_hash(data, len, BMZ_HASH_MOD16X2),
00252             len, n);
00253 
00254     if (opt & O_HASH_MASK16X2)
00255       BENCH("hash mask16x2", bmz_bench_hash(data, len, BMZ_HASH_MASK16X2),
00256             len, n);
00257 
00258     if (opt & O_HASH_MASK)
00259       BENCH("hash mask", bmz_bench_hash(data, len, BMZ_HASH_MASK), len, n);
00260 
00261     if (opt & O_HASH_MASK32X2)
00262       BENCH("hash mask32x2", bmz_bench_hash(data, len, BMZ_HASH_MASK32X2),
00263             len, n);
00264   }
00265 
00266   if (opt & O_BENCH_LUT) {
00267 
00268     if (opt & O_HASH_MOD)
00269       BENCH("lut mod", bmz_bench_lut_mod(data, len, s_fp_len, mem, s_b1, s_m),
00270             len, n);
00271 
00272     if (opt & O_HASH_MOD16X2)
00273       BENCH("lut mod16x2", bmz_bench_lut_mod16x2(data, len, s_fp_len, mem,
00274             s_b1, s_b2, s_m1, s_m2), len, n);
00275 
00276     if (opt & O_HASH_MASK16X2)
00277       BENCH("lut mask16x2", bmz_bench_lut_mask16x2(data, len, s_fp_len, mem,
00278             s_b1, s_b2), len, n);
00279 
00280     if (opt & O_HASH_MASK)
00281       BENCH("lut mask", bmz_bench_lut_mask(data, len, s_fp_len, mem, s_b1),
00282             len, n);
00283 
00284     if (opt & O_HASH_MASK32X2)
00285       BENCH("lut mask32x2", bmz_bench_lut_mask32x2(data, len, s_fp_len, mem,
00286             s_b1, s_b2), len, n);
00287   }
00288 
00289   if (opt != O_DEFAULT && (opt & 0xf)) return;
00290 
00291   if (opt & O_HASH_MOD) {
00292     BENCH("bm pack mod", test_bm_mod(data, len, buf, &out_len, mem), len, n);
00293     BENCH("bm unpack", test_bm_unpack(buf, out_len, buf + out_len, &len2),
00294           len, n);
00295     HT_CHECK(len == len2);
00296     HT_CHECK(memcmp(data, buf + out_len, len) == 0);
00297   }
00298 
00299   if (opt & O_HASH_MOD16X2) {
00300     memset(buf, 0, out_len); out_len = out_len0;
00301     BENCH("bm pack mod16x2", test_bm_mod16x2(data, len, buf, &out_len, mem),
00302           len, n);
00303     BENCH("bm unpack", test_bm_unpack(buf, out_len, buf + out_len, &len2),
00304           len, n);
00305     HT_CHECK(len == len2);
00306     HT_CHECK(memcmp(data, buf + out_len, len) == 0);
00307   }
00308 
00309   if (opt & O_HASH_MASK16X2) {
00310     memset(buf, 0, out_len); out_len = out_len0;
00311     BENCH("bm pack mask16x2", test_bm_mask16x2(data, len, buf, &out_len, mem),
00312           len, n);
00313     BENCH("bm unpack", test_bm_unpack(buf, out_len, buf + out_len, &len2),
00314           len, n);
00315     HT_CHECK(len == len2);
00316     HT_CHECK(memcmp(data, buf + out_len, len) == 0);
00317   }
00318 
00319   if (opt & O_HASH_MASK) {
00320     memset(buf, 0, out_len); out_len = out_len0;
00321     BENCH("bm pack mask", test_bm_mask(data, len, buf, &out_len, mem), len, n);
00322     BENCH("bm unpack", test_bm_unpack(buf, out_len, buf + out_len, &len2),
00323           len, n);
00324     HT_CHECK(len == len2);
00325     HT_CHECK(memcmp(data, buf + out_len, len) == 0);
00326   }
00327 
00328   if (opt & O_HASH_MASK32X2) {
00329     memset(buf, 0, out_len); out_len = out_len0;
00330     BENCH("bm pack mask32x2", test_bm_mask32x2(data, len, buf, &out_len, mem),
00331           len, n);
00332     BENCH("bm unpack", test_bm_unpack(buf, out_len, buf + out_len, &len2),
00333           len, n);
00334     HT_CHECK(len == len2);
00335     HT_CHECK(memcmp(data, buf + out_len, len) == 0);
00336   }
00337 }
00338 
00339 static void
00340 test_from_stdin() {
00341   size_t len;
00342   char *data = read_from_fp(stdin, &len);
00343   test_from_string(data, len);
00344 }
00345 
00346 static void
00347 test_from_file(const char *fname) {
00348   int fd = open(fname, O_RDONLY, 0);
00349   char *data = NULL;
00350   struct stat st;
00351   long len;
00352 
00353   if (fd == -1) DIE("cannot open '%s'", fname);
00354 
00355   if (fstat(fd, &st) != 0) DIE("stat failed on '%s'", fname);
00356 
00357   len = st.st_size;
00358 
00359   if (!s_no_mmap) {
00360 #ifndef HT_NO_MMAP
00361     LOG(1, "mmaping %ld bytes in to memory...", len);
00362     data = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
00363 
00364     if ((char *)-1 == data) {
00365       LOG(0, "mmap failed on '%s', trying alternative...", fname);
00366       errno = 0;
00367       data = NULL;
00368     }
00369 #endif
00370   }
00371 
00372   if (!data) {
00373     LOG(1, "reading %ld bytes in to memory...", len);
00374 
00375     data = malloc(len);
00376 
00377     if (!data) DIE("error alloc'ing %ld bytes", len);
00378 
00379     if ((len = read(fd, data, len)) != st.st_size)
00380       DIE("error reading %s (expecting %ld bytes, got %ld",
00381           fname, (long)st.st_size, len);
00382   }
00383   else {
00384   }
00385 
00386   test_from_string(data, st.st_size);
00387 }
00388 
00389 static void
00390 show_usage() {
00391   printf("usage: bmz-test [options] [string...]\n%s%s%s",
00392          "--help                        show this help\n"
00393          "--verbose[=level]             set verbose level\n"
00394          "--file        <file>          use <file as input\n"
00395          "--offset      <number>        set bm offset\n"
00396          "--fp-len      <number>        set bm fingerprint length\n"
00397          "--hash                        compute hash value only\n"
00398          "--b1          <number>        hash param b1\n"
00399          "--b2          <number>        hash param b2\n"
00400          "--m           <number>        hash param m\n",
00401          "--m1          <number>        hash param m1\n"
00402          "--m2          <number>        hash param m2\n"
00403          "--hash-mod                    use hash-mod\n"
00404          "--hash-mod16x2                use hash-mod16x2\n"
00405          "--hash-mask16x2               use hash-mask16x2\n"
00406          "--hash-mask                   use hash-mask\n",
00407          "--hash-mask32x2               use hash-mask32x2\n"
00408          "--bench-hash                  bechmarks for builtin hashes\n"
00409          "--check-hash                  verify rolling hashes\n"
00410          "--bench-lut                   benchmarks for lookup table\n"
00411          "--times       <number>        number of repeats for the test\n"
00412 );
00413   exit(0);
00414 }
00415 
00416 int
00417 main(int ac, char *av[]) {
00418   char **ia = av + 1, **a_end = av + ac, *ep;
00419   const char *fname = NULL;
00420 
00421   for (; ia < a_end; ++ia) {
00422     if (!strcmp("--fp-len", *ia))               s_fp_len = atoi(*++ia);
00423     else if (!strcmp("--offset", *ia))          s_offset = atoi(*++ia);
00424     else if (!strcmp("--times", *ia))           s_times = atoi(*++ia);
00425     else if (!strcmp("--hash", *ia))            s_show_hash = 1;
00426     else if (!strcmp("--b1", *ia))              s_b1 = atoi(*++ia);
00427     else if (!strcmp("--b2", *ia))              s_b2 = atoi(*++ia);
00428     else if (!strcmp("--m", *ia))               s_m = strtol(*++ia, &ep, 0);
00429     else if (!strcmp("--m1", *ia))              s_m1 = strtol(*++ia, &ep, 0);
00430     else if (!strcmp("--m2", *ia))              s_m2 = strtol(*++ia, &ep, 0);
00431     else if (!strcmp("--file", *ia))            fname = *++ia;
00432     else if (!strcmp("--no-mmap", *ia))         s_no_mmap = 1;
00433     else if (!strcmp("--verbose", *ia))         s_verbosity = 1;
00434     else if (!strncmp("--verbose=", *ia, 10))   s_verbosity = atoi(*ia + 10);
00435     else if (!strcmp("--bm-dump", *ia))         s_bm_dump = 1;
00436     else if (!strcmp("--bench-hash", *ia))      s_options |= O_BENCH_HASH;
00437     else if (!strcmp("--check-hash", *ia))      s_options |= O_CHECK_HASH;
00438     else if (!strcmp("--bench-lut", *ia))       s_options |= O_BENCH_LUT;
00439     else if (!strcmp("--hash-mod", *ia))        s_options |= O_HASH_MOD;
00440     else if (!strcmp("--hash-mod16x2", *ia))    s_options |= O_HASH_MOD16X2;
00441     else if (!strcmp("--hash-mask16x2", *ia))   s_options |= O_HASH_MASK16X2;
00442     else if (!strcmp("--hash-mask", *ia))       s_options |= O_HASH_MASK;
00443     else if (!strcmp("--hash-mask32x2", *ia))   s_options |= O_HASH_MASK32X2;
00444     else if (!strcmp("--help", *ia))            show_usage();
00445     else if (!strcmp("--", *ia)) {
00446       ++ia;
00447       break;
00448     }
00449     else if ('-' == **ia) {
00450       DIE("unknown option: %s\n", *ia);
00451     }
00452     else break;
00453   }
00454   bmz_set_verbosity(s_verbosity);
00455 
00456   if (!s_options) s_options = O_DEFAULT;
00457   else if (!(s_options & O_HASHES)) s_options |= O_HASHES;
00458 
00459   if (fname)
00460     test_from_file(fname);
00461   else if (ia >= a_end)
00462     test_from_stdin();
00463   else for (; ia < a_end; ++ia)
00464     test_from_string(*ia, strlen(*ia));
00465 
00466   return 0;
00467 }
00468 
00469 /* vim: et sw=2
00470  */

Generated on Sat Aug 15 08:52:18 2009 for hypertable by  doxygen 1.5.9