diff --git a/src/external/sdefl.h b/src/external/sdefl.h index 5db76763a..56539a56e 100644 --- a/src/external/sdefl.h +++ b/src/external/sdefl.h @@ -38,10 +38,10 @@ this file implementation in *one* C or C++ file to prevent collisions. | zlib 1.2.11 -1 | 72 MB/s | 307 MB/s | 42298774 | 42.30 | | zlib 1.2.11 -6 | 24 MB/s | 313 MB/s | 36548921 | 36.55 | | zlib 1.2.11 -9 | 20 MB/s | 314 MB/s | 36475792 | 36.48 | -| sdefl 1.0 -0 | 127 MB/s | 371 MB/s | 40004116 | 39.88 | -| sdefl 1.0 -1 | 111 MB/s | 398 MB/s | 38940674 | 38.82 | -| sdefl 1.0 -5 | 45 MB/s | 420 MB/s | 36577183 | 36.46 | -| sdefl 1.0 -7 | 38 MB/s | 423 MB/s | 36523781 | 36.41 | +| sdefl 1.0 -0 | 127 MB/s | 355 MB/s | 40004116 | 39.88 | +| sdefl 1.0 -1 | 111 MB/s | 413 MB/s | 38940674 | 38.82 | +| sdefl 1.0 -5 | 45 MB/s | 436 MB/s | 36577183 | 36.46 | +| sdefl 1.0 -7 | 38 MB/s | 432 MB/s | 36523781 | 36.41 | | libdeflate 1.3 -1 | 147 MB/s | 667 MB/s | 39597378 | 39.60 | | libdeflate 1.3 -6 | 69 MB/s | 689 MB/s | 36648318 | 36.65 | | libdeflate 1.3 -9 | 13 MB/s | 672 MB/s | 35197141 | 35.20 | @@ -50,20 +50,20 @@ this file implementation in *one* C or C++ file to prevent collisions. ### Compression Results on the [Silesia compression corpus](http://sun.aei.polsl.pl/~sdeor/index.php?page=silesia): -| File | Original | `sdefl 0` | `sdefl 5` | `sdefl 7` | -| :------ | ---------: | -----------------: | ---------: | ----------: | -| dickens | 10.192.446 | 4,260,187| 3,845,261| 3,833,657 | -| mozilla | 51.220.480 | 20,774,706 | 19,607,009 | 19,565,867 | -| mr | 9.970.564 | 3,860,531 | 3,673,460 | 3,665,627 | -| nci | 33.553.445 | 4,030,283 | 3,094,526 | 3,006,075 | -| ooffice | 6.152.192 | 3,320,063 | 3,186,373 | 3,183,815 | -| osdb | 10.085.684 | 3,919,646 | 3,649,510 | 3,649,477 | -| reymont | 6.627.202 | 2,263,378 | 1,857,588 | 1,827,237 | -| samba | 21.606.400 | 6,121,797 | 5,462,670 | 5,450,762 | -| sao | 7.251.944 | 5,612,421 | 5,485,380 | 5,481,765 | -| webster | 41.458.703 | 13,972,648 | 12,059,432 | 11,991,421 | -| xml | 5.345.280 | 886,620| 674,009 | 662,141 | -| x-ray | 8.474.240 | 6,304,655 | 6,244,779 | 6,244,779 | +| File | Original | `sdefl 0` | `sdefl 5` | `sdefl 7` | +| --------| -----------| -------------| ---------- | ------------| +| dickens | 10.192.446 | 4,260,187 | 3,845,261 | 3,833,657 | +| mozilla | 51.220.480 | 20,774,706 | 19,607,009 | 19,565,867 | +| mr | 9.970.564 | 3,860,531 | 3,673,460 | 3,665,627 | +| nci | 33.553.445 | 4,030,283 | 3,094,526 | 3,006,075 | +| ooffice | 6.152.192 | 3,320,063 | 3,186,373 | 3,183,815 | +| osdb | 10.085.684 | 3,919,646 | 3,649,510 | 3,649,477 | +| reymont | 6.627.202 | 2,263,378 | 1,857,588 | 1,827,237 | +| samba | 21.606.400 | 6,121,797 | 5,462,670 | 5,450,762 | +| sao | 7.251.944 | 5,612,421 | 5,485,380 | 5,481,765 | +| webster | 41.458.703 | 13,972,648 | 12,059,432 | 11,991,421 | +| xml | 5.345.280 | 886,620 | 674,009 | 662,141 | +| x-ray | 8.474.240 | 6,304,655 | 6,244,779 | 6,244,779 | ## License ``` @@ -462,8 +462,12 @@ sdefl_match_codes(struct sdefl_match_codes *cod, int dist, int len) { 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 28 }; + assert(len <= 258); + assert(dist <= 32768); cod->ls = lslot[len]; cod->lc = 257 + cod->ls; + assert(cod->lc <= 285); + cod->dx = sdefl_ilog2(sdefl_npow2(dist) >> 2); cod->dc = cod->dx ? ((cod->dx + 1) << 1) + (dist > dxmax[cod->dx]) : dist-1; } @@ -501,7 +505,9 @@ sdefl_flush(unsigned char **dst, struct sdefl *s, int is_last, sdefl_precode(&symcnt, freqs, items, s->cod.len.lit, s->cod.len.off); sdefl_huff(lens, codes, freqs, SDEFL_PRE_MAX, SDEFL_PRE_CODES); for (item_cnt = SDEFL_PRE_MAX; item_cnt > 4; item_cnt--) { - if (lens[perm[item_cnt - 1]]) break; + if (lens[perm[item_cnt - 1]]){ + break; + } } /* block header */ sdefl_put(dst, s, is_last ? 0x01 : 0x00, 1); /* block */ @@ -509,8 +515,9 @@ sdefl_flush(unsigned char **dst, struct sdefl *s, int is_last, sdefl_put(dst, s, symcnt.lit - 257, 5); sdefl_put(dst, s, symcnt.off - 1, 5); sdefl_put(dst, s, item_cnt - 4, 4); - for (i = 0; i < item_cnt; ++i) + for (i = 0; i < item_cnt; ++i) { sdefl_put(dst, s, lens[perm[i]], 3); + } for (i = 0; i < symcnt.items; ++i) { unsigned sym = items[i] & 0x1F; sdefl_put(dst, s, (int)codes[sym], lens[sym]); @@ -521,12 +528,14 @@ sdefl_flush(unsigned char **dst, struct sdefl *s, int is_last, } /* block sequences */ for (i = 0; i < s->seq_cnt; ++i) { - if (s->seq[i].off >= 0) + if (s->seq[i].off >= 0) { for (j = 0; j < s->seq[i].len; ++j) { int c = in[s->seq[i].off + j]; sdefl_put(dst, s, (int)s->cod.word.lit[c], s->cod.len.lit[c]); } - else sdefl_match(dst, s, -s->seq[i].off, s->seq[i].len); + } else { + sdefl_match(dst, s, -s->seq[i].off, s->seq[i].len); + } } sdefl_put(dst, s, (int)(s)->cod.word.lit[SDEFL_EOB], (s)->cod.len.lit[SDEFL_EOB]); memset(&s->freq, 0, sizeof(s->freq)); @@ -579,12 +588,13 @@ sdefl_compr(struct sdefl *s, unsigned char *out, const unsigned char *in, for (n = 0; n < SDEFL_HASH_SIZ; ++n) { s->tbl[n] = SDEFL_NIL; } - do {int blk_end = i + SDEFL_BLK_MAX < in_len ? i + SDEFL_BLK_MAX : in_len; + do {int blk_end = ((i + SDEFL_BLK_MAX) < in_len) ? (i + SDEFL_BLK_MAX) : in_len; while (i < blk_end) { struct sdefl_match m = {0}; - int max_match = ((in_len-i)>SDEFL_MAX_MATCH) ? SDEFL_MAX_MATCH:(in_len-i); + int left = blk_end - i; + int max_match = (left >= SDEFL_MAX_MATCH) ? SDEFL_MAX_MATCH : left; int nice_match = pref[lvl] < max_match ? pref[lvl] : max_match; - int run = 1, inc = 1, run_inc; + int run = 1, inc = 1, run_inc = 0; if (max_match > SDEFL_MIN_MATCH) { sdefl_fnd(&m, s, max_chain, max_match, in, i); } @@ -615,9 +625,11 @@ sdefl_compr(struct sdefl *s, unsigned char *out, const unsigned char *in, unsigned h = sdefl_hash32(&in[i]); s->prv[i&SDEFL_WIN_MSK] = s->tbl[h]; s->tbl[h] = i, i += inc; + assert(i <= blk_end); } } else { i += run_inc; + assert(i <= blk_end); } } if (litlen) { @@ -627,8 +639,9 @@ sdefl_compr(struct sdefl *s, unsigned char *out, const unsigned char *in, sdefl_flush(&q, s, blk_end == in_len, in); } while (i < in_len); - if (s->bitcnt) + if (s->bitcnt > 0) sdefl_put(&q, s, 0x00, 8 - s->bitcnt); + return (int)(q - out); } extern int diff --git a/src/external/sinfl.h b/src/external/sinfl.h index 09f50d2bc..915da9d23 100644 --- a/src/external/sinfl.h +++ b/src/external/sinfl.h @@ -10,7 +10,7 @@ as needed to keep the implementation as concise as possible. - Dual license with either MIT or public domain - Small implementation - Deflate: 525 LoC - - Inflate: 320 LoC + - Inflate: 500 LoC - Webassembly: - Deflate ~3.7 KB (~2.2KB compressed) - Inflate ~3.6 KB (~2.2KB compressed) @@ -39,10 +39,10 @@ this file implementation in *one* C or C++ file to prevent collisions. | zlib 1.2.11 -1 | 72 MB/s | 307 MB/s | 42298774 | 42.30 | | zlib 1.2.11 -6 | 24 MB/s | 313 MB/s | 36548921 | 36.55 | | zlib 1.2.11 -9 | 20 MB/s | 314 MB/s | 36475792 | 36.48 | -| sdefl 1.0 -0 | 127 MB/s | 371 MB/s | 40004116 | 39.88 | -| sdefl 1.0 -1 | 111 MB/s | 398 MB/s | 38940674 | 38.82 | -| sdefl 1.0 -5 | 45 MB/s | 420 MB/s | 36577183 | 36.46 | -| sdefl 1.0 -7 | 38 MB/s | 423 MB/s | 36523781 | 36.41 | +| sdefl 1.0 -0 | 127 MB/s | 355 MB/s | 40004116 | 39.88 | +| sdefl 1.0 -1 | 111 MB/s | 413 MB/s | 38940674 | 38.82 | +| sdefl 1.0 -5 | 45 MB/s | 436 MB/s | 36577183 | 36.46 | +| sdefl 1.0 -7 | 38 MB/s | 432 MB/s | 36523781 | 36.41 | | libdeflate 1.3 -1 | 147 MB/s | 667 MB/s | 39597378 | 39.60 | | libdeflate 1.3 -6 | 69 MB/s | 689 MB/s | 36648318 | 36.65 | | libdeflate 1.3 -9 | 13 MB/s | 672 MB/s | 35197141 | 35.20 | @@ -51,20 +51,20 @@ this file implementation in *one* C or C++ file to prevent collisions. ### Compression Results on the [Silesia compression corpus](http://sun.aei.polsl.pl/~sdeor/index.php?page=silesia): -| File | Original | `sdefl 0` | `sdefl 5` | `sdefl 7` | -| :------ | ---------: | -----------------: | ---------: | ----------: | -| dickens | 10.192.446 | 4,260,187| 3,845,261| 3,833,657 | -| mozilla | 51.220.480 | 20,774,706 | 19,607,009 | 19,565,867 | -| mr | 9.970.564 | 3,860,531 | 3,673,460 | 3,665,627 | -| nci | 33.553.445 | 4,030,283 | 3,094,526 | 3,006,075 | -| ooffice | 6.152.192 | 3,320,063 | 3,186,373 | 3,183,815 | -| osdb | 10.085.684 | 3,919,646 | 3,649,510 | 3,649,477 | -| reymont | 6.627.202 | 2,263,378 | 1,857,588 | 1,827,237 | -| samba | 21.606.400 | 6,121,797 | 5,462,670 | 5,450,762 | -| sao | 7.251.944 | 5,612,421 | 5,485,380 | 5,481,765 | -| webster | 41.458.703 | 13,972,648 | 12,059,432 | 11,991,421 | -| xml | 5.345.280 | 886,620| 674,009 | 662,141 | -| x-ray | 8.474.240 | 6,304,655 | 6,244,779 | 6,244,779 | +| File | Original | `sdefl 0` | `sdefl 5` | `sdefl 7` | +| --------| -----------| -------------| ---------- | ------------| +| dickens | 10.192.446 | 4,260,187 | 3,845,261 | 3,833,657 | +| mozilla | 51.220.480 | 20,774,706 | 19,607,009 | 19,565,867 | +| mr | 9.970.564 | 3,860,531 | 3,673,460 | 3,665,627 | +| nci | 33.553.445 | 4,030,283 | 3,094,526 | 3,006,075 | +| ooffice | 6.152.192 | 3,320,063 | 3,186,373 | 3,183,815 | +| osdb | 10.085.684 | 3,919,646 | 3,649,510 | 3,649,477 | +| reymont | 6.627.202 | 2,263,378 | 1,857,588 | 1,827,237 | +| samba | 21.606.400 | 6,121,797 | 5,462,670 | 5,450,762 | +| sao | 7.251.944 | 5,612,421 | 5,485,380 | 5,481,765 | +| webster | 41.458.703 | 13,972,648 | 12,059,432 | 11,991,421 | +| xml | 5.345.280 | 886,620 | 674,009 | 662,141 | +| x-ray | 8.474.240 | 6,304,655 | 6,244,779 | 6,244,779 | ## License ``` @@ -151,7 +151,7 @@ extern int zsinflate(void *out, int cap, const void *in, int size); #endif #ifndef SINFL_NO_SIMD -#if __x86_64__ || defined(_WIN32) || defined(_WIN64) +#if defined(__x86_64__) || defined(_WIN32) || defined(_WIN64) #include #define sinfl_char16 __m128i #define sinfl_char16_ld(p) _mm_loadu_si128((const __m128i *)(void*)(p)) @@ -183,6 +183,18 @@ sinfl_read64(const void *p) { memcpy(&n, p, 8); return n; } +static void +sinfl_copy64(unsigned char **dst, unsigned char **src) { + unsigned long long n; + memcpy(&n, *src, 8); + memcpy(*dst, &n, 8); + *dst += 8, *src += 8; +} +static unsigned char* +sinfl_write64(unsigned char *dst, unsigned long long w) { + memcpy(dst, &w, 8); + return dst + 8; +} #ifndef SINFL_NO_SIMD static unsigned char* sinfl_write128(unsigned char *dst, sinfl_char16 w) { @@ -195,25 +207,12 @@ sinfl_copy128(unsigned char **dst, unsigned char **src) { sinfl_char16_str(*dst, n); *dst += 16, *src += 16; } -#else -static unsigned char* -sinfl_write64(unsigned char *dst, unsigned long long w) { - memcpy(dst, &w, 8); - return dst + 8; -} -static void -sinfl_copy64(unsigned char **dst, unsigned char **src) { - unsigned long long n; - memcpy(&n, *src, 8); - memcpy(*dst, &n, 8); - *dst += 8, *src += 8; -} #endif static void sinfl_refill(struct sinfl *s) { s->bitbuf |= sinfl_read64(s->bitptr) << s->bitcnt; s->bitptr += (63 - s->bitcnt) >> 3; - s->bitcnt |= 56; /* bitcount is in range [56,63] */ + s->bitcnt |= 56; /* bitcount in range [56,63] */ } static int sinfl_peek(struct sinfl *s, int cnt) { @@ -222,7 +221,7 @@ sinfl_peek(struct sinfl *s, int cnt) { return s->bitbuf & ((1ull << cnt) - 1); } static void -sinfl_consume(struct sinfl *s, int cnt) { +sinfl_eat(struct sinfl *s, int cnt) { assert(cnt <= s->bitcnt); s->bitbuf >>= cnt; s->bitcnt -= cnt; @@ -230,7 +229,7 @@ sinfl_consume(struct sinfl *s, int cnt) { static int sinfl__get(struct sinfl *s, int cnt) { int res = sinfl_peek(s, cnt); - sinfl_consume(s, cnt); + sinfl_eat(s, cnt); return res; } static int @@ -285,7 +284,7 @@ sinfl_build_subtbl(struct sinfl_gen *gen, unsigned *tbl, int tbl_bits, while (1) { unsigned entry; int bit, stride, i; - /* start new subtable */ + /* start new sub-table */ if ((gen->word & ((1 << tbl_bits)-1)) != sub_prefix) { int used = 0; sub_prefix = gen->word & ((1 << tbl_bits)-1); @@ -299,7 +298,7 @@ sinfl_build_subtbl(struct sinfl_gen *gen, unsigned *tbl, int tbl_bits, tbl_end = sub_start + (1 << sub_bits); tbl[sub_prefix] = (sub_start << 16) | 0x10 | (sub_bits & 0xf); } - /* fill subtable */ + /* fill sub-table */ entry = (*gen->sorted << 16) | ((gen->len - tbl_bits) & 0xf); gen->sorted++; i = sub_start + (gen->word >> tbl_bits); @@ -353,18 +352,17 @@ sinfl_build(unsigned *tbl, unsigned char *lens, int tbl_bits, int maxlen, } static int sinfl_decode(struct sinfl *s, const unsigned *tbl, int bit_len) { - sinfl_refill(s); - {int idx = sinfl_peek(s, bit_len); + int idx = sinfl_peek(s, bit_len); unsigned key = tbl[idx]; if (key & 0x10) { /* sub-table lookup */ int len = key & 0x0f; - sinfl_consume(s, bit_len); + sinfl_eat(s, bit_len); idx = sinfl_peek(s, len); key = tbl[((key >> 16) & 0xffff) + (unsigned)idx]; } - sinfl_consume(s, key & 0x0f); - return (key >> 16) & 0x0fff;} + sinfl_eat(s, key & 0x0f); + return (key >> 16) & 0x0fff; } static int sinfl_decompress(unsigned char *out, int cap, const unsigned char *in, int size) { @@ -402,11 +400,11 @@ sinfl_decompress(unsigned char *out, int cap, const unsigned char *in, int size) } break; case stored: { /* uncompressed block */ - int len; + int len, nlen; sinfl_refill(&s); sinfl__get(&s,s.bitcnt & 7); len = sinfl__get(&s,16); - //int nlen = sinfl__get(&s,16); // @raysan5: Unused variable? + nlen = sinfl__get(&s,16); in -= 2; s.bitcnt = 0; if (len > (e-in) || !len) @@ -430,40 +428,62 @@ sinfl_decompress(unsigned char *out, int cap, const unsigned char *in, int size) state = blk; } break; case dyn: { - /* dynamic huffman codes */ - int n, i; - unsigned hlens[SINFL_PRE_TBL_SIZE]; - unsigned char nlens[19] = {0}, lens[288+32]; + /* dynamic huffman codes */ + int n, i; + unsigned hlens[SINFL_PRE_TBL_SIZE]; + unsigned char nlens[19] = {0}, lens[288+32]; + sinfl_refill(&s); + {int nlit = 257 + sinfl__get(&s,5); + int ndist = 1 + sinfl__get(&s,5); + int nlen = 4 + sinfl__get(&s,4); + for (n = 0; n < nlen; n++) + nlens[order[n]] = (unsigned char)sinfl_get(&s,3); + sinfl_build(hlens, nlens, 7, 7, 19); + + /* decode code lengths */ + for (n = 0; n < nlit + ndist;) { sinfl_refill(&s); - {int nlit = 257 + sinfl__get(&s,5); - int ndist = 1 + sinfl__get(&s,5); - int nlen = 4 + sinfl__get(&s,4); - for (n = 0; n < nlen; n++) - nlens[order[n]] = (unsigned char)sinfl_get(&s,3); - sinfl_build(hlens, nlens, 7, 7, 19); - - /* decode code lengths */ - for (n = 0; n < nlit + ndist;) { - int sym = sinfl_decode(&s, hlens, 7); - switch (sym) {default: lens[n++] = (unsigned char)sym; break; - case 16: for (i=3+sinfl_get(&s,2);i;i--,n++) lens[n]=lens[n-1]; break; - case 17: for (i=3+sinfl_get(&s,3);i;i--,n++) lens[n]=0; break; - case 18: for (i=11+sinfl_get(&s,7);i;i--,n++) lens[n]=0; break;} - } - /* build lit/dist tables */ - sinfl_build(s.lits, lens, 10, 15, nlit); - sinfl_build(s.dsts, lens + nlit, 8, 15, ndist); - state = blk;} + int sym = sinfl_decode(&s, hlens, 7); + switch (sym) {default: lens[n++] = (unsigned char)sym; break; + case 16: for (i=3+sinfl_get(&s,2);i;i--,n++) lens[n]=lens[n-1]; break; + case 17: for (i=3+sinfl_get(&s,3);i;i--,n++) lens[n]=0; break; + case 18: for (i=11+sinfl_get(&s,7);i;i--,n++) lens[n]=0; break;} + } + /* build lit/dist tables */ + sinfl_build(s.lits, lens, 10, 15, nlit); + sinfl_build(s.dsts, lens + nlit, 8, 15, ndist); + state = blk;} } break; case blk: { /* decompress block */ - int sym = sinfl_decode(&s, s.lits, 10); - if (sym < 256) { - /* literal */ - *out++ = (unsigned char)sym; - } else if (sym > 256) {sym -= 257; /* match symbol */ + while (1) { sinfl_refill(&s); + int sym = sinfl_decode(&s, s.lits, 10); + if (sym < 256) { + /* literal */ + if (sinfl_unlikely(out >= oe)) { + return (int)(out-o); + } + *out++ = (unsigned char)sym; + sym = sinfl_decode(&s, s.lits, 10); + if (sym < 256) { + *out++ = (unsigned char)sym; + continue; + } + } + if (sinfl_unlikely(sym == 256)) { + /* end of block */ + if (last) return (int)(out-o); + state = hdr; + break; + } + /* match */ + if (sym >= 286) { + /* length codes 286 and 287 must not appear in compressed data */ + return (int)(out-o); + } + sym -= 257; {int len = sinfl__get(&s, lbits[sym]) + lbase[sym]; int dsym = sinfl_decode(&s, s.dsts, 8); int offs = sinfl__get(&s, dbits[dsym]) + dbase[dsym]; @@ -476,11 +496,17 @@ sinfl_decompress(unsigned char *out, int cap, const unsigned char *in, int size) #ifndef SINFL_NO_SIMD if (sinfl_likely(oe - out >= 16 * 3)) { if (offs >= 16) { - /* copy match */ + /* simd copy match */ sinfl_copy128(&dst, &src); sinfl_copy128(&dst, &src); do sinfl_copy128(&dst, &src); while (dst < out); + } else if (offs >= 8) { + /* word copy match */ + sinfl_copy64(&dst, &src); + sinfl_copy64(&dst, &src); + do sinfl_copy64(&dst, &src); + while (dst < out); } else if (offs == 1) { /* rle match copying */ sinfl_char16 w = sinfl_char16_char(src[0]); @@ -489,6 +515,7 @@ sinfl_decompress(unsigned char *out, int cap, const unsigned char *in, int size) do dst = sinfl_write128(dst, w); while (dst < out); } else { + /* byte copy match */ *dst++ = *src++; *dst++ = *src++; do *dst++ = *src++; @@ -498,7 +525,7 @@ sinfl_decompress(unsigned char *out, int cap, const unsigned char *in, int size) #else if (sinfl_likely(oe - out >= 3 * 8 - 3)) { if (offs >= 8) { - /* copy match */ + /* word copy match */ sinfl_copy64(&dst, &src); sinfl_copy64(&dst, &src); do sinfl_copy64(&dst, &src); @@ -513,6 +540,7 @@ sinfl_decompress(unsigned char *out, int cap, const unsigned char *in, int size) do dst = sinfl_write64(dst, w); while (dst < out); } else { + /* byte copy match */ *dst++ = *src++; *dst++ = *src++; do *dst++ = *src++; @@ -524,13 +552,8 @@ sinfl_decompress(unsigned char *out, int cap, const unsigned char *in, int size) *dst++ = *src++; *dst++ = *src++; do *dst++ = *src++; - while (dst < out);} - } - } else { - /* end of block */ - if (last) return (int)(out-o); - state = hdr; - break; + while (dst < out); + }} } } break;} }