00001
00022 #include "Common/compat-c.h"
00023 #include <stdio.h>
00024 #include <stdlib.h>
00025 #include <string.h>
00026 #include <unistd.h>
00027 #include <limits.h>
00028 #include <stdint.h>
00029 #include <stdarg.h>
00030 #include <errno.h>
00031 #include <fcntl.h>
00032 #include <sys/stat.h>
00033 #ifndef HT_NO_MMAP
00034 # include <sys/mman.h>
00035 static int s_no_mmap = 0;
00036 #else
00037 static int s_no_mmap = 1;
00038 #endif
00039
00040 #include "bmz-internal.h"
00041
00042 #define BMZ_MAGIC "BMZ"
00043 #define BMZIP_VER 0x0110
00044 #define BMZ_HEADER_SZ (strlen(BMZ_MAGIC) + 2 + 1 + 6 + 4)
00045
00046 #define BMZ_A_PACK 0
00047 #define BMZ_A_UNPACK 1
00048 #define BMZ_A_LIST 2
00049
00050 #define BMZ_O_BM_ONLY 1
00051 #define BMZ_O_STREAM 2
00052
00053 typedef unsigned char Byte;
00054
00055
00056 typedef long long unsigned Llu;
00057 typedef long unsigned Lu;
00058
00059 static int s_verbosity = 0;
00060 static int s_bm_dump = 0;
00061 static int s_bm_hash = 0;
00062
00063 #define LOG(_lvl_, _fmt_, ...) if (s_verbosity >= _lvl_) do { \
00064 fprintf(stderr, "bmzip: %s: " _fmt_, __FUNCTION__, ##__VA_ARGS__); \
00065 if (errno) fprintf(stderr, ": %s", strerror(errno)); \
00066 putc('\n', stderr); \
00067 } while (0)
00068
00069 #define WARN(_fmt_, ...) do { \
00070 LOG(0, "warning: " _fmt_, ##__VA_ARGS__); \
00071 } while (0)
00072
00073 #define DIE(_fmt_, ...) do { \
00074 LOG(0, "fatal: " _fmt_, ##__VA_ARGS__); \
00075 exit(1); \
00076 } while (0)
00077
00078 #define BMZ_ALIGN(_mem_, _n_) (Byte *)(_mem_) + _n_ - (((size_t)(_mem_))%(_n_))
00079
00080 #define BMZ_READ_INT16(_p_, _n_) \
00081 _n_ = (*_p_++ << 8); \
00082 _n_ |= (*_p_++)
00083
00084 #define BMZ_READ_INT32(_p_, _n_) \
00085 _n_ = (*_p_++ << 24); \
00086 _n_ |= (*_p_++ << 16); \
00087 _n_ |= (*_p_++ << 8); \
00088 _n_ |= (*_p_++)
00089
00090 #define BMZ_READ_INT48(_p_, _n_) \
00091 _n_ = ((uint64_t)*_p_++ << 40); \
00092 _n_ |= ((uint64_t)*_p_++ << 32); \
00093 _n_ |= (*_p_++ << 24); \
00094 _n_ |= (*_p_++ << 16); \
00095 _n_ |= (*_p_++ << 8); \
00096 _n_ |= (*_p_++)
00097
00098 #define BMZ_WRITE_INT16(_p_, _n_) \
00099 *_p_++ = (Byte)(_n_ >> 8); \
00100 *_p_++ = (Byte)(_n_)
00101
00102 #define BMZ_WRITE_INT32(_p_, _n_) \
00103 *_p_++ = (Byte)(_n_ >> 24); \
00104 *_p_++ = (Byte)(_n_ >> 16); \
00105 *_p_++ = (Byte)(_n_ >> 8); \
00106 *_p_++ = (Byte)(_n_)
00107
00108 #define BMZ_WRITE_INT48(_p_, _n_) \
00109 *_p_++ = (Byte)(_n_ >> 40); \
00110 *_p_++ = (Byte)(_n_ >> 32); \
00111 *_p_++ = (Byte)(_n_ >> 24); \
00112 *_p_++ = (Byte)(_n_ >> 16); \
00113 *_p_++ = (Byte)(_n_ >> 8); \
00114 *_p_++ = (Byte)(_n_)
00115
00116 static void
00117 read_bmz_header(int fd, Byte *buf) {
00118 if (read(fd, buf, BMZ_HEADER_SZ) != BMZ_HEADER_SZ)
00119 DIE("error reading bmz file header (%lu bytes)", (Lu)BMZ_HEADER_SZ);
00120 }
00121
00122 static void
00123 parse_bmz_header(const Byte *buf, uint16_t *version_p, uint64_t *orig_size_p,
00124 uint32_t *checksum_p, uint32_t *options) {
00125 const Byte *bp = buf;
00126 size_t magic_len = strlen(BMZ_MAGIC);
00127
00128 if (memcmp(buf, BMZ_MAGIC, magic_len)) {
00129 DIE("bad magic in file header (%lu bytes)", (Lu)magic_len);
00130 }
00131 bp += magic_len;
00132 BMZ_READ_INT16(bp, *version_p);
00133
00134 if (*version_p > BMZIP_VER)
00135 DIE("incomaptible version: %04x", *version_p);
00136
00137 *options = *bp++;
00138 BMZ_READ_INT48(bp, *orig_size_p);
00139 BMZ_READ_INT32(bp, *checksum_p);
00140 }
00141
00142 static void
00143 write_bmz_header(int fd, size_t in_len, uint32_t checksum, Byte options) {
00144 char buf[BMZ_HEADER_SZ], *bp = buf;
00145 uint64_t orig_size = in_len;
00146
00147 strcpy(buf, BMZ_MAGIC);
00148 bp += strlen(BMZ_MAGIC);
00149 BMZ_WRITE_INT16(bp, BMZIP_VER);
00150 *bp++ = options;
00151 BMZ_WRITE_INT48(bp, orig_size);
00152 BMZ_WRITE_INT32(bp, checksum);
00153
00154 if (write(fd, buf, BMZ_HEADER_SZ) != BMZ_HEADER_SZ)
00155 DIE("error writing header (%lu bytes)", (Lu)BMZ_HEADER_SZ);
00156 }
00157
00158 static void
00159 do_list(int fd) {
00160 Byte buf[BMZ_HEADER_SZ];
00161 uint16_t version;
00162 uint64_t orig_size, size;
00163 uint32_t checksum, options;
00164 struct stat st;
00165
00166 if (fstat(fd, &st) != 0) DIE("error getting stat from file (%d)", fd);
00167
00168 size = st.st_size;
00169 read_bmz_header(fd, buf);
00170 parse_bmz_header(buf, &version, &orig_size, &checksum, &options);
00171 printf("%8s%16s%16s%8s\n", "version", "compressed", "uncompressed", "ratio");
00172 printf(" %04x%16llu%16llu%7.2f%%\n", version, (Llu)size,
00173 (Llu)orig_size, orig_size ? size * 100. / orig_size : 1);
00174 }
00175
00176 static void
00177 do_pack(const void *in, size_t in_len, size_t buf_len,
00178 size_t offset, size_t fp_len, Byte options) {
00179 size_t buflen = bmz_pack_buflen(in_len), out_len = buflen;
00180 size_t worklen = bmz_pack_worklen(in_len, fp_len);
00181 int ret, bm_only = (options & BMZ_O_BM_ONLY) || s_bm_dump;
00182 Byte *out, *work_mem;
00183
00184 if (bm_only) {
00185 out_len = in_len + 1;
00186
00187 if (buf_len > in_len + worklen) {
00188 out = (Byte *)in + in_len;
00189 work_mem = out + out_len;
00190 }
00191 else {
00192 out = malloc(worklen);
00193
00194 if (!out)
00195 DIE("error allocating %lu bytes memory", (Lu)worklen);
00196
00197 work_mem = out + out_len;
00198 }
00199
00200 work_mem = BMZ_ALIGN(work_mem, 8);
00201 }
00202 else if (buf_len > buflen + worklen) {
00203 work_mem = (Byte *)in + buflen;
00204 out = (Byte *)in;
00205 }
00206 else {
00207 out = malloc(buflen + worklen);
00208
00209 if (!out)
00210 DIE("error allocating %lu bytes memory", (Lu)buflen + worklen);
00211
00212 work_mem = out + buflen;
00213 }
00214
00215 if (bm_only) {
00216 ret = bmz_bm_pack_mask(in, in_len, out, &out_len, offset, fp_len,
00217 work_mem, 257);
00218 if (ret != BMZ_E_OK)
00219 DIE("error encoding bm output (error %d)", ret);
00220
00221 if (s_bm_dump) {
00222 if ((ret = bmz_bm_dump(out, out_len)) != BMZ_E_OK)
00223 WARN("error dumping bm encoding (ret=%d)", ret);
00224
00225 return;
00226 }
00227 }
00228 else if ((ret = bmz_pack(in, in_len, out, &out_len, offset, fp_len,
00229 (s_bm_hash << 24), work_mem))
00230 != BMZ_E_OK) {
00231 DIE("error compressing input (error %d)", ret);
00232 }
00233 write_bmz_header(1, in_len, bmz_checksum(out, out_len), options);
00234 write(1, out, out_len);
00235 }
00236
00237 static void
00238 do_unpack(const void *in, size_t in_len, size_t buf_len) {
00239 const Byte *bp = (Byte *)in;
00240 uint16_t version;
00241 uint64_t orig_size;
00242 uint32_t checksum, cs, options;
00243 size_t outlen, worklen, len = in_len - BMZ_HEADER_SZ;
00244 Byte *out, *workmem;
00245 int ret;
00246
00247 if (in_len < BMZ_HEADER_SZ) DIE("file truncated (size: %lu)", (Lu)in_len);
00248
00249 parse_bmz_header(bp, &version, &orig_size, &checksum, &options);
00250
00251 if (orig_size > INT_MAX && sizeof(size_t) == 4)
00252 DIE("original file size %llu requires 64-bit version of bmzip",
00253 (Llu)orig_size);
00254
00255 bp += BMZ_HEADER_SZ;
00256 buf_len -= BMZ_HEADER_SZ;
00257 cs = bmz_checksum(bp, len);
00258 outlen = orig_size;
00259
00260 if (cs != checksum)
00261 DIE("checksum mismatch (expecting %x, got %x).", checksum, cs);
00262
00263 if (options & BMZ_O_BM_ONLY) {
00264 out = buf_len > in_len + orig_size ? (Byte*)bp + len : malloc(outlen);
00265
00266 if ((ret = bmz_bm_unpack(bp, len, out, &outlen)) != BMZ_E_OK)
00267 DIE("error decoding bm input (error %d)", ret);
00268 }
00269 else {
00270 worklen = bmz_unpack_worklen(orig_size > len ? orig_size : len);
00271 out = (buf_len > outlen + worklen) ? (Byte *)bp : malloc(outlen + worklen);
00272 workmem = out + outlen;
00273
00274 if ((ret = bmz_unpack(bp, len, out, &outlen, workmem)) != BMZ_E_OK)
00275 DIE("error decompressing (error %d)", ret);
00276 }
00277 if (orig_size != outlen)
00278 WARN("size mismatch (expecting %llu, got %llu)",
00279 (Llu)orig_size, (Llu)outlen);
00280
00281 write(1, out, outlen);
00282 }
00283
00284 static void
00285 do_block(const void *in, size_t len, size_t buf_len, size_t offset,
00286 size_t fp_len, int action, int options) {
00287 switch (action) {
00288 case BMZ_A_PACK:
00289 do_pack(in, len, buf_len, offset, fp_len, options);
00290 break;
00291 case BMZ_A_UNPACK:
00292 do_unpack(in, len, buf_len);
00293 break;
00294 default:
00295 DIE("unknown action: %d", action);
00296 }
00297 }
00298
00299 static char *
00300 read_from_fp(FILE *fp, size_t *len_p, size_t *size_p) {
00301 char *data = NULL;
00302 char buf[65536];
00303 int64_t len = 0, size = 0, ret;
00304
00305 while ((ret = fread(buf, 1, sizeof(buf), fp)) > 0) {
00306 len += ret;
00307 if (len > INT_MAX)
00308 DIE("reading from stdin for data size greater than 2GB "
00309 "not yet supported (current size: %lld)", (long long)len);
00310
00311 if (len > size) {
00312 size = (len + 16) * 5 / 2;
00313 data = realloc(data, size);
00314 }
00315 memcpy(data + len - ret, buf, ret);
00316 }
00317 *len_p = len;
00318 *size_p = size;
00319 return data;
00320 }
00321
00322 static char *
00323 read_from_fd(int fd, size_t *len_p, size_t *size_p) {
00324 struct stat st;
00325 void *data = NULL;
00326 size_t sz;
00327
00328 if (fstat(fd, &st) != 0) DIE("cannot stat fd <%d>", fd);
00329
00330 if (st.st_size > INT_MAX && sizeof(size_t) == 4)
00331 DIE("file size %llu requires 64-bit version of bmzip",
00332 (Llu)st.st_size);
00333
00334 sz = *len_p = *size_p = st.st_size;
00335
00336 if (!sz) return data;
00337
00338 if (!s_no_mmap) {
00339 #ifndef HT_NO_MMAP
00340 LOG(1, "mmapping file (size: %lu)...", (Lu)sz);
00341 data = mmap(NULL, sz, PROT_READ, MAP_PRIVATE, fd, 0);
00342
00343 if (!data || (void *)-1 == data) {
00344 LOG(1, "mmap failed on fd %d", fd);
00345 errno = 0;
00346 LOG(1, "%s", "trying alternative");
00347 data = NULL;
00348 }
00349 #endif
00350 }
00351 if (!data) {
00352 LOG(1, "reading file (size: %lu) into memory...", (Lu)sz);
00353 data = malloc(sz);
00354
00355 if (!data) DIE("cannot allocate %lu bytes memory", (Lu)sz);
00356
00357 if (read(fd, data, sz) != sz) DIE("error reading %lu bytes", (Lu)sz);
00358 }
00359
00360 return data;
00361 }
00362
00363 static void
00364 input_from_stdin(size_t offset, size_t fp_len, int action, int options) {
00365 size_t len, buf_len;
00366
00367 if (action == BMZ_A_LIST) {
00368 do_list(0);
00369 }
00370 else {
00371 void *data = read_from_fp(stdin, &len, &buf_len);
00372 do_block(data, len, buf_len, offset, fp_len, action, options);
00373 }
00374 }
00375
00376 static void
00377 input_from_file(const char *fname, size_t offset, size_t fp_len, int action,
00378 int options) {
00379 size_t len, buf_len;
00380 int fd = open(fname, O_RDONLY, 0);
00381
00382 if (fd == -1) DIE("cannot open '%s'", fname);
00383
00384 if (action == BMZ_A_LIST) {
00385 do_list(fd);
00386 }
00387 else {
00388 void *data = read_from_fd(fd, &len, &buf_len);
00389 do_block(data, len, buf_len, offset, fp_len, action, options);
00390 }
00391
00392 }
00393
00394 static int
00395 bm_hash(const char *name) {
00396
00397 if (!strcmp("mod", name)) return BMZ_HASH_MOD;
00398 else if (!strcmp("mod16x2", name)) return BMZ_HASH_MOD16X2;
00399 else if (!strcmp("mask16x2", name)) return BMZ_HASH_MASK16X2;
00400 else if (!strcmp("mask", name)) return BMZ_HASH_MASK;
00401 else if (!strcmp("mask32x2", name)) return BMZ_HASH_MASK32X2;
00402
00403 DIE("unknown hash: %s", name);
00404 return 0;
00405 }
00406
00407 static void HT_NORETURN
00408 show_usage() {
00409 fprintf(stderr, "%s%s",
00410 "usage: bmzip [options] [<file>]\n"
00411 "-d, --decompress decompress to stdout\n"
00412 "--verbose[=level] show some diagnostic messages\n"
00413 "-l, --list list compressed file info\n"
00414 "-h, --help show this message\n"
00415 "--offset <number> expert: bm encoding start offset\n"
00416 "--fp-len <number> expert: bm encoding fingerprint size\n"
00417 "--bm-thresh <number> expert: bm hash collision threshold\n",
00418 "--bm-hash <name> expert: use <name> as bm hash\n"
00419 "--bm-only expert: skip lz compression\n"
00420 "--bm-dump expert: dump human readable bm encoding\n"
00421 "--no-mmap expert: do not use mmap\n");
00422 exit(0);
00423 }
00424
00425 int
00426 main(int ac, char *av[]) {
00427 char **ia = av + 1, **a_end = av + ac;
00428
00429 size_t fp_len = 64, offset = 0;
00430 int bm_thresh = 0, action = BMZ_A_PACK, options = 0;
00431
00432 for (; ia < a_end; ++ia) {
00433 if (!strcmp("-d", *ia) ||
00434 !strcmp("--decompress", *ia)) action = BMZ_A_UNPACK;
00435 else if (!strcmp("--verbose", *ia)) s_verbosity = 1;
00436 else if (!strcmp("--verbose=", *ia)) s_verbosity = atoi(*ia + 9);
00437 else if (!strcmp("--offset", *ia)) offset = atoi(*++ia);
00438 else if (!strcmp("--fp-len", *ia)) fp_len = atoi(*++ia);
00439 else if (!strcmp("--bm-only", *ia)) options |= BMZ_O_BM_ONLY;
00440 else if (!strcmp("--bm-dump", *ia)) s_bm_dump = 1;
00441 else if (!strcmp("--no-mmap", *ia)) s_no_mmap = 1;
00442 else if (!strcmp("--bm-thresh", *ia)) bm_thresh = atoi(*++ia);
00443 else if (!strcmp("--bm-hash", *ia)) s_bm_hash = bm_hash(*++ia);
00444 else if (!strcmp("-l", *ia) ||
00445 !strcmp("--list", *ia)) action = BMZ_A_LIST;
00446 else if (!strcmp("-h", *ia) ||
00447 !strcmp("--help", *ia)) {
00448 show_usage();
00449 }
00450 else if (!strcmp("--version", *ia)) {
00451 LOG(0, "version %d.%d.%d.%d", BMZIP_VER >> 12, (BMZIP_VER >> 8) & 0xf,
00452 (BMZIP_VER >> 4) & 0xf, BMZIP_VER & 0xf);
00453 exit(0);
00454 }
00455 else if (!strcmp("--", *ia)) {
00456 ++ia;
00457 break;
00458 }
00459 else if ('-' == **ia)
00460 DIE("unknown option: %s\n", *ia);
00461 else break;
00462 }
00463 if (s_verbosity)
00464 bmz_set_verbosity(s_verbosity);
00465
00466 if (bm_thresh)
00467 bmz_set_collision_thresh(bm_thresh);
00468
00469 if (ia >= a_end)
00470 input_from_stdin(offset, fp_len, action, options);
00471 else
00472 input_from_file(*ia, offset, fp_len, action, options);
00473
00474 return 0;
00475 }