· 5 years ago · Sep 04, 2020, 06:36 AM
1--[[
2 -------------------------------
3 --- INFORMATION ---
4 -------------------------------
5
6 Written by Haoqian He
7 Edited/Forked by Scarious
8
9 This is the forked version of the LibDeflate library by Haoqian He intended for luau. Credit for the original source code and such
10 goes to their respective creators (basic credits can be viewed under CREDITS, and more expansive credits/licensing info can be viewed
11 under LICENSING AND COPYRIGHT.
12
13 Original documentation can be viewed here: https://safeteewow.github.io/LibDeflate/source/LibDeflate.lua.html
14
15 You can access the LibDeflate library (and most LibDeflate methods seen in the original documentation) by using Compression.Library
16
17
18 -------------------------------
19 --- DOCUMENTATION ---
20 -------------------------------
21
22 Compression Methods:
23
24 Compression.Deflate.Compress(data, configs?)
25 Compression.Zlib.Compress(data, configs?)
26
27
28 Decompression Methods:
29
30 Compression.Deflate.Decompress(compressedData)
31 Compression.Zlib.Decompress(compressedData)
32
33
34 USAGE:
35
36 configs table:
37
38 {
39 level = 0; -- integer 0 -> 9 where 0 is no compression and 9 is most compression
40 strategy = "" -- "huffman_only", "fixed", "dynamic"
41 }
42
43 note :: the higher the level, the slower the compression will be
44 :: configs table is optional, if not supplied (aka nil) default level+strategy will be used
45
46
47 methods:
48
49 Method: Compression.Deflate.Compress(data, configs?):
50
51 Description: Compresses a string using the raw deflate format
52
53 Input:
54 - String: data = The data to be compressed
55 - table?: configs = The configuration table to control the compression
56
57 Output:
58 - String: compressedData = The compressed data
59 - int: paddedBits = The number of bits padded at the end of the output
60
61
62 Method: Compression.Deflate.Decompress(compressedData):
63
64 Description: Decompresses a raw deflate compressed data.
65
66 Input:
67 - String: compressedData = The data to be decompressed
68
69 Output:
70 - String: data = The decompressed data
71
72
73
74 Method: Compression.Zlib.Compress(data, configs?):
75
76 Description: Compresses a string using the zlib format
77
78 Input:
79 - String: data = The data to be compressed
80 - table?: configs = The configuration table to control the compression
81
82 Output:
83 - String: compressedData = The compressed data
84 - int: paddedBits = The number of bits padded at the end of the output
85
86
87 Method: Compression.Deflate.Decompress(compressedData):
88
89 Description: Decompresses a zlib compressed data.
90
91 Input:
92 - String: compressedData = The data to be decompressed
93
94 Output:
95 - String: data = The decompressed data
96
97
98 -------------------------------
99 --- CREDITS ---
100 -------------------------------
101
102 - LibDeflate Library: Haoqian He
103 - zlib: Jean-loup Gailly and Mark Adler
104 - puff: Mark Adler
105 - LibCompress: jjsheets and Galmok (WoW)
106 - 6bit encoding/decoding: WeakAuras2 (WoW)
107
108
109 -------------------------------
110 --- LICENSING AND COPYRIGHT ---
111 -------------------------------
112
113 LibDeflate 1.0.2-release <br>
114 Pure Lua compressor and decompressor with high compression ratio using
115 DEFLATE/zlib format.
116 @file LibDeflate.lua
117 @author Haoqian He (Github: SafeteeWoW; World of Warcraft: Safetyy-Illidan(US))
118 @copyright LibDeflate <2018-2020> Haoqian He
119 @license zlib License
120 This library is implemented according to the following specifications.
121 Report a bug if LibDeflate is not fully compliant with those specs.
122 Both compressors and decompressors have been implemented in the library.
123 1. RFC1950: DEFLATE Compressed Data Format Specification version 1.3
124 https://tools.ietf.org/html/rfc1951
125 2. RFC1951: ZLIB Compressed Data Format Specification version 3.3
126 https://tools.ietf.org/html/rfc1950
127
128
129 zlib License
130 (C) 2018-2020 Haoqian He
131 This software is provided 'as-is', without any express or implied
132 warranty. In no event will the authors be held liable for any damages
133 arising from the use of this software.
134 Permission is granted to anyone to use this software for any purpose,
135 including commercial applications, and to alter it and redistribute it
136 freely, subject to the following restrictions:
137 1. The origin of this software must not be misrepresented; you must not
138 claim that you wrote the original software. If you use this software
139 in a product, an acknowledgment in the product documentation would be
140 appreciated but is not required.
141 2. Altered source versions must be plainly marked as such, and must not be
142 misrepresented as being the original software.
143 3. This notice may not be removed or altered from any source distribution.
144 License History:
145 1. GNU General Public License Version 3 in v1.0.0 and earlier versions.
146 2. GNU Lesser General Public License Version 3 in v1.0.1
147 3. the zlib License since v1.0.2
148
149 Credits and Disclaimer:
150 This library rewrites the code from the algorithm
151 and the ideas of the following projects,
152 and uses their code to help to test the correctness of this library,
153 but their code is not included directly in the library itself.
154 Their original licenses shall be comply when used:
155 1. zlib, by Jean-loup Gailly (compression) and Mark Adler (decompression).
156 http://www.zlib.net/
157 Licensed under zlib License. http://www.zlib.net/zlib_license.html
158 For the compression algorithm.
159 2. puff, by Mark Adler. https://github.com/madler/zlib/tree/master/contrib/puff
160 Licensed under zlib License. http://www.zlib.net/zlib_license.html
161 For the decompression algorithm.
162 3. LibCompress, by jjsheets and Galmok of European Stormrage (Horde)
163 https://www.wowace.com/projects/libcompress
164 Licensed under GPLv2.
165 https://www.gnu.org/licenses/old-licenses/gpl-2.0.html
166 For the code to create customized codec.
167 4. WeakAuras2,
168 https://github.com/WeakAuras/WeakAuras2
169 Licensed under GPLv2.
170 For the 6bit encoding and decoding.
171]]
172
173local Compression = {}
174local LibDeflate = {}
175
176Compression.Deflate = {}
177Compression.Zlib = {}
178Compression.Library = LibDeflate
179
180--[[
181 Method: Compression.Deflate.Compress
182
183 Description: Compresses a string using the raw deflate format
184
185 Input:
186 - String: data = The data to be compressed
187 - table?: configs = The configuration table to control the compression
188
189 Output:
190 - String: compressedData = The compressed data
191 - int: paddedBits = The number of bits padded at the end of the output
192
193 For more information see:
194 - LibDeflate:CompressDeflate
195 - compression_configs
196]]
197
198function Compression.Deflate.Compress(data, configs)
199 return LibDeflate:CompressDeflate(data)
200end
201
202
203--[[
204 Method: Compression.Deflate.Decompress
205
206 Description: Decompresses a raw deflate compressed data.
207
208 Input:
209 - String: compressedData = The data to be decompressed
210
211 Output:
212 - String: data = The decompressed data
213
214 For more information see:
215 - LibDeflate:DecompressDeflate
216 - compression_configs
217]]
218
219function Compression.Deflate.Decompress(compressedData)
220 return LibDeflate:DecompressDeflate(compressedData)
221end
222
223
224
225--[[
226 Method: Compression.Zlib.Compress
227
228 Description: Compresses a string using the zlib format
229
230 Input:
231 - String: data = The data to be compressed
232 - table?: configs = The configuration table to control the compression
233
234 Output:
235 - String: compressedData = The compressed data
236 - int: paddedBits = The number of bits padded at the end of the output
237
238 For more information see:
239 - LibDeflate:CompressZlib
240 - compression_configs
241]]
242
243function Compression.Zlib.Compress(data, configs)
244 return LibDeflate:CompressZlib(data, configs)
245end
246
247
248--[[
249 Method: Compression.Deflate.Decompress
250
251 Description: Decompresses a zlib compressed data.
252
253 Input:
254 - String: compressedData = The data to be decompressed
255
256 Output:
257 - String: data = The decompressed data
258
259 For more information see:
260 - LibDeflate:DecompressZlib
261 - compression_configs
262]]
263
264function Compression.Zlib.Decompress(compressedData)
265 return LibDeflate:DecompressZlib(compressedData)
266end
267
268
269
270
271
272--[[
273
274 LIBDEFLATE LIBRARY:
275
276]]
277
278
279do
280 -- Semantic version. all lowercase.
281 -- Suffix can be alpha1, alpha2, beta1, beta2, rc1, rc2, etc.
282 -- NOTE: Two version numbers needs to modify.
283 -- 1. On the top of LibDeflate.lua
284 -- 2. _VERSION
285 -- 3. _MINOR
286
287 -- version to store the official version of LibDeflate
288 local _VERSION = "1.0.2-release"
289
290 -- When MAJOR is changed, I should name it as LibDeflate2
291 local _MAJOR = "LibDeflate"
292
293 -- Update this whenever a new version, for LibStub version registration.
294 -- 0 : v0.x
295 -- 1 : v1.0.0
296 -- 2 : v1.0.1
297 -- 3 : v1.0.2
298 local _MINOR = 3
299
300 local _COPYRIGHT =
301 "LibDeflate ".._VERSION
302 .." Copyright (C) 2018-2020 Haoqian He."
303 .." Licensed under the zlib License"
304
305 -- Register in the World of Warcraft library "LibStub" if detected.
306 LibDeflate = {}
307
308 LibDeflate._VERSION = _VERSION
309 LibDeflate._MAJOR = _MAJOR
310 LibDeflate._MINOR = _MINOR
311 LibDeflate._COPYRIGHT = _COPYRIGHT
312end
313
314-- localize Lua api for faster access.
315local assert = assert
316local error = error
317local pairs = pairs
318local string_byte = string.byte
319local string_char = string.char
320local string_find = string.find
321local string_gsub = string.gsub
322local string_sub = string.sub
323local table_concat = table.concat
324local table_sort = table.sort
325local tostring = tostring
326local type = type
327
328-- Converts i to 2^i, (0<=i<=32)
329-- This is used to implement bit left shift and bit right shift.
330-- "x >> y" in C: "(x-x%_pow2[y])/_pow2[y]" in Lua
331-- "x << y" in C: "x*_pow2[y]" in Lua
332local _pow2 = {}
333
334-- Converts any byte to a character, (0<=byte<=255)
335local _byte_to_char = {}
336
337-- _reverseBitsTbl[len][val] stores the bit reverse of
338-- the number with bit length "len" and value "val"
339-- For example, decimal number 6 with bits length 5 is binary 00110
340-- It's reverse is binary 01100,
341-- which is decimal 12 and 12 == _reverseBitsTbl[5][6]
342-- 1<=len<=9, 0<=val<=2^len-1
343-- The reason for 1<=len<=9 is that the max of min bitlen of huffman code
344-- of a huffman alphabet is 9?
345local _reverse_bits_tbl = {}
346
347-- Convert a LZ77 length (3<=len<=258) to
348-- a deflate literal/LZ77_length code (257<=code<=285)
349local _length_to_deflate_code = {}
350
351-- convert a LZ77 length (3<=len<=258) to
352-- a deflate literal/LZ77_length code extra bits.
353local _length_to_deflate_extra_bits = {}
354
355-- Convert a LZ77 length (3<=len<=258) to
356-- a deflate literal/LZ77_length code extra bit length.
357local _length_to_deflate_extra_bitlen = {}
358
359-- Convert a small LZ77 distance (1<=dist<=256) to a deflate code.
360local _dist256_to_deflate_code = {}
361
362-- Convert a small LZ77 distance (1<=dist<=256) to
363-- a deflate distance code extra bits.
364local _dist256_to_deflate_extra_bits = {}
365
366-- Convert a small LZ77 distance (1<=dist<=256) to
367-- a deflate distance code extra bit length.
368local _dist256_to_deflate_extra_bitlen = {}
369
370-- Convert a literal/LZ77_length deflate code to LZ77 base length
371-- The key of the table is (code - 256), 257<=code<=285
372local _literal_deflate_code_to_base_len = {
373 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31,
374 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258,
375}
376
377-- Convert a literal/LZ77_length deflate code to base LZ77 length extra bits
378-- The key of the table is (code - 256), 257<=code<=285
379local _literal_deflate_code_to_extra_bitlen = {
380 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2,
381 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0,
382}
383
384-- Convert a distance deflate code to base LZ77 distance. (0<=code<=29)
385local _dist_deflate_code_to_base_dist = {
386 [0] = 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
387 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,
388 8193, 12289, 16385, 24577,
389}
390
391-- Convert a distance deflate code to LZ77 bits length. (0<=code<=29)
392local _dist_deflate_code_to_extra_bitlen = {
393 [0] = 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6,
394 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13,
395}
396
397-- The code order of the first huffman header in the dynamic deflate block.
398-- See the page 12 of RFC1951
399local _rle_codes_huffman_bitlen_order = {16, 17, 18,
400 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15,
401}
402
403-- The following tables are used by fixed deflate block.
404-- The value of these tables are assigned at the bottom of the source.
405
406-- The huffman code of the literal/LZ77_length deflate codes,
407-- in fixed deflate block.
408local _fix_block_literal_huffman_code
409
410-- Convert huffman code of the literal/LZ77_length to deflate codes,
411-- in fixed deflate block.
412local _fix_block_literal_huffman_to_deflate_code
413
414-- The bit length of the huffman code of literal/LZ77_length deflate codes,
415-- in fixed deflate block.
416local _fix_block_literal_huffman_bitlen
417
418-- The count of each bit length of the literal/LZ77_length deflate codes,
419-- in fixed deflate block.
420local _fix_block_literal_huffman_bitlen_count
421
422-- The huffman code of the distance deflate codes,
423-- in fixed deflate block.
424local _fix_block_dist_huffman_code
425
426-- Convert huffman code of the distance to deflate codes,
427-- in fixed deflate block.
428local _fix_block_dist_huffman_to_deflate_code
429
430-- The bit length of the huffman code of the distance deflate codes,
431-- in fixed deflate block.
432local _fix_block_dist_huffman_bitlen
433
434-- The count of each bit length of the huffman code of
435-- the distance deflate codes,
436-- in fixed deflate block.
437local _fix_block_dist_huffman_bitlen_count
438
439for i = 0, 255 do
440 _byte_to_char[i] = string_char(i)
441end
442
443do
444 local pow = 1
445 for i = 0, 32 do
446 _pow2[i] = pow
447 pow = pow * 2
448 end
449end
450
451for i = 1, 9 do
452 _reverse_bits_tbl[i] = {}
453 for j=0, _pow2[i+1]-1 do
454 local reverse = 0
455 local value = j
456 for _ = 1, i do
457 -- The following line is equivalent to "res | (code %2)" in C.
458 reverse = reverse - reverse%2
459 + (((reverse%2==1) or (value % 2) == 1) and 1 or 0)
460 value = (value-value%2)/2
461 reverse = reverse * 2
462 end
463 _reverse_bits_tbl[i][j] = (reverse-reverse%2)/2
464 end
465end
466
467-- The source code is written according to the pattern in the numbers
468-- in RFC1951 Page10.
469do
470 local a = 18
471 local b = 16
472 local c = 265
473 local bitlen = 1
474 for len = 3, 258 do
475 if len <= 10 then
476 _length_to_deflate_code[len] = len + 254
477 _length_to_deflate_extra_bitlen[len] = 0
478 elseif len == 258 then
479 _length_to_deflate_code[len] = 285
480 _length_to_deflate_extra_bitlen[len] = 0
481 else
482 if len > a then
483 a = a + b
484 b = b * 2
485 c = c + 4
486 bitlen = bitlen + 1
487 end
488 local t = len-a-1+b/2
489 _length_to_deflate_code[len] = (t-(t%(b/8)))/(b/8) + c
490 _length_to_deflate_extra_bitlen[len] = bitlen
491 _length_to_deflate_extra_bits[len] = t % (b/8)
492 end
493 end
494end
495
496-- The source code is written according to the pattern in the numbers
497-- in RFC1951 Page11.
498do
499 _dist256_to_deflate_code[1] = 0
500 _dist256_to_deflate_code[2] = 1
501 _dist256_to_deflate_extra_bitlen[1] = 0
502 _dist256_to_deflate_extra_bitlen[2] = 0
503
504 local a = 3
505 local b = 4
506 local code = 2
507 local bitlen = 0
508 for dist = 3, 256 do
509 if dist > b then
510 a = a * 2
511 b = b * 2
512 code = code + 2
513 bitlen = bitlen + 1
514 end
515 _dist256_to_deflate_code[dist] = (dist <= a) and code or (code+1)
516 _dist256_to_deflate_extra_bitlen[dist] = (bitlen < 0) and 0 or bitlen
517 if b >= 8 then
518 _dist256_to_deflate_extra_bits[dist] = (dist-b/2-1) % (b/4)
519 end
520 end
521end
522
523--- Calculate the Adler-32 checksum of the string. <br>
524-- See RFC1950 Page 9 https://tools.ietf.org/html/rfc1950 for the
525-- definition of Adler-32 checksum.
526-- @param str [string] the input string to calcuate its Adler-32 checksum.
527-- @return [integer] The Adler-32 checksum, which is greater or equal to 0,
528-- and less than 2^32 (4294967296).
529function LibDeflate:Adler32(str)
530 -- This function is loop unrolled by better performance.
531 --
532 -- Here is the minimum code:
533 --
534 -- local a = 1
535 -- local b = 0
536 -- for i=1, #str do
537 -- local s = string.byte(str, i, i)
538 -- a = (a+s)%65521
539 -- b = (b+a)%65521
540 -- end
541 -- return b*65536+a
542 if type(str) ~= "string" then
543 error(("Usage: LibDeflate:Adler32(str):"
544 .." 'str' - string expected got '%s'."):format(type(str)), 2)
545 end
546 local strlen = #str
547
548 local i = 1
549 local a = 1
550 local b = 0
551 while i <= strlen - 15 do
552 local x1, x2, x3, x4, x5, x6, x7, x8,
553 x9, x10, x11, x12, x13, x14, x15, x16 = string_byte(str, i, i+15)
554 b = (b+16*a+16*x1+15*x2+14*x3+13*x4+12*x5+11*x6+10*x7+9*x8+8*x9
555 +7*x10+6*x11+5*x12+4*x13+3*x14+2*x15+x16)%65521
556 a = (a+x1+x2+x3+x4+x5+x6+x7+x8+x9+x10+x11+x12+x13+x14+x15+x16)%65521
557 i = i + 16
558 end
559 while (i <= strlen) do
560 local x = string_byte(str, i, i)
561 a = (a + x) % 65521
562 b = (b + a) % 65521
563 i = i + 1
564 end
565 return (b*65536+a) % 4294967296
566end
567
568-- Compare adler32 checksum.
569-- adler32 should be compared with a mod to avoid sign problem
570-- 4072834167 (unsigned) is the same adler32 as -222133129
571local function IsEqualAdler32(actual, expected)
572 return (actual % 4294967296) == (expected % 4294967296)
573end
574
575--- Create a preset dictionary.
576--
577-- This function is not fast, and the memory consumption of the produced
578-- dictionary is about 50 times of the input string. Therefore, it is suggestted
579-- to run this function only once in your program.
580--
581-- It is very important to know that if you do use a preset dictionary,
582-- compressors and decompressors MUST USE THE SAME dictionary. That is,
583-- dictionary must be created using the same string. If you update your program
584-- with a new dictionary, people with the old version won't be able to transmit
585-- data with people with the new version. Therefore, changing the dictionary
586-- must be very careful.
587--
588-- The parameters "strlen" and "adler32" add a layer of verification to ensure
589-- the parameter "str" is not modified unintentionally during the program
590-- development.
591--
592-- @usage local dict_str = "1234567890"
593--
594-- -- print(dict_str:len(), LibDeflate:Adler32(dict_str))
595-- -- Hardcode the print result below to verify it to avoid acciently
596-- -- modification of 'str' during the program development.
597-- -- string length: 10, Adler-32: 187433486,
598-- -- Don't calculate string length and its Adler-32 at run-time.
599--
600-- local dict = LibDeflate:CreateDictionary(dict_str, 10, 187433486)
601--
602-- @param str [string] The string used as the preset dictionary. <br>
603-- You should put stuffs that frequently appears in the dictionary
604-- string and preferablely put more frequently appeared stuffs toward the end
605-- of the string. <br>
606-- Empty string and string longer than 32768 bytes are not allowed.
607-- @param strlen [integer] The length of 'str'. Please pass in this parameter
608-- as a hardcoded constant, in order to verify the content of 'str'. The value
609-- of this parameter should be known before your program runs.
610-- @param adler32 [integer] The Adler-32 checksum of 'str'. Please pass in this
611-- parameter as a hardcoded constant, in order to verify the content of 'str'.
612-- The value of this parameter should be known before your program runs.
613-- @return [table] The dictionary used for preset dictionary compression and
614-- decompression.
615-- @raise error if 'strlen' does not match the length of 'str',
616-- or if 'adler32' does not match the Adler-32 checksum of 'str'.
617function LibDeflate:CreateDictionary(str, strlen, adler32)
618 if type(str) ~= "string" then
619 error(("Usage: LibDeflate:CreateDictionary(str, strlen, adler32):"
620 .." 'str' - string expected got '%s'."):format(type(str)), 2)
621 end
622 if type(strlen) ~= "number" then
623 error(("Usage: LibDeflate:CreateDictionary(str, strlen, adler32):"
624 .." 'strlen' - number expected got '%s'."):format(
625 type(strlen)), 2)
626 end
627 if type(adler32) ~= "number" then
628 error(("Usage: LibDeflate:CreateDictionary(str, strlen, adler32):"
629 .." 'adler32' - number expected got '%s'."):format(
630 type(adler32)), 2)
631 end
632 if strlen ~= #str then
633 error(("Usage: LibDeflate:CreateDictionary(str, strlen, adler32):"
634 .." 'strlen' does not match the actual length of 'str'."
635 .." 'strlen': %u, '#str': %u ."
636 .." Please check if 'str' is modified unintentionally.")
637 :format(strlen, #str))
638 end
639 if strlen == 0 then
640 error(("Usage: LibDeflate:CreateDictionary(str, strlen, adler32):"
641 .." 'str' - Empty string is not allowed."), 2)
642 end
643 if strlen > 32768 then
644 error(("Usage: LibDeflate:CreateDictionary(str, strlen, adler32):"
645 .." 'str' - string longer than 32768 bytes is not allowed."
646 .." Got %d bytes."):format(strlen), 2)
647 end
648 local actual_adler32 = self:Adler32(str)
649 if not IsEqualAdler32(adler32, actual_adler32) then
650 error(("Usage: LibDeflate:CreateDictionary(str, strlen, adler32):"
651 .." 'adler32' does not match the actual adler32 of 'str'."
652 .." 'adler32': %u, 'Adler32(str)': %u ."
653 .." Please check if 'str' is modified unintentionally.")
654 :format(adler32, actual_adler32))
655 end
656
657 local dictionary = {}
658 dictionary.adler32 = adler32
659 dictionary.hash_tables = {}
660 dictionary.string_table = {}
661 dictionary.strlen = strlen
662 local string_table = dictionary.string_table
663 local hash_tables = dictionary.hash_tables
664 string_table[1] = string_byte(str, 1, 1)
665 string_table[2] = string_byte(str, 2, 2)
666 if strlen >= 3 then
667 local i = 1
668 local hash = string_table[1]*256+string_table[2]
669 while i <= strlen - 2 - 3 do
670 local x1, x2, x3, x4 = string_byte(str, i+2, i+5)
671 string_table[i+2] = x1
672 string_table[i+3] = x2
673 string_table[i+4] = x3
674 string_table[i+5] = x4
675 hash = (hash*256+x1)%16777216
676 local t = hash_tables[hash]
677 if not t then t = {}; hash_tables[hash] = t end
678 t[#t+1] = i-strlen
679 i = i + 1
680 hash = (hash*256+x2)%16777216
681 t = hash_tables[hash]
682 if not t then t = {}; hash_tables[hash] = t end
683 t[#t+1] = i-strlen
684 i = i + 1
685 hash = (hash*256+x3)%16777216
686 t = hash_tables[hash]
687 if not t then t = {}; hash_tables[hash] = t end
688 t[#t+1] = i-strlen
689 i = i + 1
690 hash = (hash*256+x4)%16777216
691 t = hash_tables[hash]
692 if not t then t = {}; hash_tables[hash] = t end
693 t[#t+1] = i-strlen
694 i = i + 1
695 end
696 while i <= strlen - 2 do
697 local x = string_byte(str, i+2)
698 string_table[i+2] = x
699 hash = (hash*256+x)%16777216
700 local t = hash_tables[hash]
701 if not t then t = {}; hash_tables[hash] = t end
702 t[#t+1] = i-strlen
703 i = i + 1
704 end
705 end
706 return dictionary
707end
708
709-- Check if the dictionary is valid.
710-- @param dictionary The preset dictionary for compression and decompression.
711-- @return true if valid, false if not valid.
712-- @return if not valid, the error message.
713local function IsValidDictionary(dictionary)
714 if type(dictionary) ~= "table" then
715 return false, ("'dictionary' - table expected got '%s'.")
716 :format(type(dictionary))
717 end
718 if type(dictionary.adler32) ~= "number"
719 or type(dictionary.string_table) ~= "table"
720 or type(dictionary.strlen) ~= "number"
721 or dictionary.strlen <= 0
722 or dictionary.strlen > 32768
723 or dictionary.strlen ~= #dictionary.string_table
724 or type(dictionary.hash_tables) ~= "table"
725 then
726 return false, ("'dictionary' - corrupted dictionary.")
727 :format(type(dictionary))
728 end
729 return true, ""
730end
731
732--[[
733 key of the configuration table is the compression level,
734 and its value stores the compression setting.
735 These numbers come from zlib source code.
736 Higher compression level usually means better compression.
737 (Because LibDeflate uses a simplified version of zlib algorithm,
738 there is no guarantee that higher compression level does not create
739 bigger file than lower level, but I can say it's 99% likely)
740 Be careful with the high compression level. This is a pure lua
741 implementation compressor/decompressor, which is significant slower than
742 a C/C++ equivalant compressor/decompressor. Very high compression level
743 costs significant more CPU time, and usually compression size won't be
744 significant smaller when you increase compression level by 1, when the
745 level is already very high. Benchmark yourself if you can afford it.
746 See also https://github.com/madler/zlib/blob/master/doc/algorithm.txt,
747 https://github.com/madler/zlib/blob/master/deflate.c for more information.
748 The meaning of each field:
749 @field 1 use_lazy_evaluation:
750 true/false. Whether the program uses lazy evaluation.
751 See what is "lazy evaluation" in the link above.
752 lazy_evaluation improves ratio, but relatively slow.
753 @field 2 good_prev_length:
754 Only effective if lazy is set, Only use 1/4 of max_chain,
755 if prev length of lazy match is above this.
756 @field 3 max_insert_length/max_lazy_match:
757 If not using lazy evaluation,
758 insert new strings in the hash table only if the match length is not
759 greater than this length.
760 If using lazy evaluation, only continue lazy evaluation,
761 if previous match length is strictly smaller than this value.
762 @field 4 nice_length:
763 Number. Don't continue to go down the hash chain,
764 if match length is above this.
765 @field 5 max_chain:
766 Number. The maximum number of hash chains we look.
767--]]
768local _compression_level_configs = {
769 [0] = {false, nil, 0, 0, 0}, -- level 0, no compression
770 [1] = {false, nil, 4, 8, 4}, -- level 1, similar to zlib level 1
771 [2] = {false, nil, 5, 18, 8}, -- level 2, similar to zlib level 2
772 [3] = {false, nil, 6, 32, 32}, -- level 3, similar to zlib level 3
773 [4] = {true, 4, 4, 16, 16}, -- level 4, similar to zlib level 4
774 [5] = {true, 8, 16, 32, 32}, -- level 5, similar to zlib level 5
775 [6] = {true, 8, 16, 128, 128}, -- level 6, similar to zlib level 6
776 [7] = {true, 8, 32, 128, 256}, -- (SLOW) level 7, similar to zlib level 7
777 [8] = {true, 32, 128, 258, 1024} , --(SLOW) level 8,similar to zlib level 8
778 [9] = {true, 32, 258, 258, 4096},
779 -- (VERY SLOW) level 9, similar to zlib level 9
780}
781
782-- Check if the compression/decompression arguments is valid
783-- @param str The input string.
784-- @param check_dictionary if true, check if dictionary is valid.
785-- @param dictionary The preset dictionary for compression and decompression.
786-- @param check_configs if true, check if config is valid.
787-- @param configs The compression configuration table
788-- @return true if valid, false if not valid.
789-- @return if not valid, the error message.
790local function IsValidArguments(str,
791 check_dictionary, dictionary,
792 check_configs, configs)
793
794 if type(str) ~= "string" then
795 return false,
796 ("'str' - string expected got '%s'."):format(type(str))
797 end
798 if check_dictionary then
799 local dict_valid, dict_err = IsValidDictionary(dictionary)
800 if not dict_valid then
801 return false, dict_err
802 end
803 end
804 if check_configs then
805 local type_configs = type(configs)
806 if type_configs ~= "nil" and type_configs ~= "table" then
807 return false,
808 ("'configs' - nil or table expected got '%s'.")
809 :format(type(configs))
810 end
811 if type_configs == "table" then
812 for k, v in pairs(configs) do
813 if k ~= "level" and k ~= "strategy" then
814 return false,
815 ("'configs' - unsupported table key in the configs: '%s'.")
816 :format(k)
817 elseif k == "level" and not _compression_level_configs[v] then
818 return false,
819 ("'configs' - unsupported 'level': %s."):format(tostring(v))
820 elseif k == "strategy" and v ~= "fixed" and v ~= "huffman_only"
821 and v ~= "dynamic" then
822 -- random_block_type is for testing purpose
823 return false, ("'configs' - unsupported 'strategy': '%s'.")
824 :format(tostring(v))
825 end
826 end
827 end
828 end
829 return true, ""
830end
831
832
833
834--[[ --------------------------------------------------------------------------
835 Compress code
836--]] --------------------------------------------------------------------------
837
838-- partial flush to save memory
839local _FLUSH_MODE_MEMORY_CLEANUP = 0
840-- full flush with partial bytes
841local _FLUSH_MODE_OUTPUT = 1
842-- write bytes to get to byte boundary
843local _FLUSH_MODE_BYTE_BOUNDARY = 2
844-- no flush, just get num of bits written so far
845local _FLUSH_MODE_NO_FLUSH = 3
846
847--[[
848 Create an empty writer to easily write stuffs as the unit of bits.
849 Return values:
850 1. WriteBits(code, bitlen):
851 2. WriteString(str):
852 3. Flush(mode):
853--]]
854local function CreateWriter()
855 local buffer_size = 0
856 local cache = 0
857 local cache_bitlen = 0
858 local total_bitlen = 0
859 local buffer = {}
860 -- When buffer is big enough, flush into result_buffer to save memory.
861 local result_buffer = {}
862
863 -- Write bits with value "value" and bit length of "bitlen" into writer.
864 -- @param value: The value being written
865 -- @param bitlen: The bit length of "value"
866 -- @return nil
867 local function WriteBits(value, bitlen)
868 cache = cache + value * _pow2[cache_bitlen]
869 cache_bitlen = cache_bitlen + bitlen
870 total_bitlen = total_bitlen + bitlen
871 -- Only bulk to buffer every 4 bytes. This is quicker.
872 if cache_bitlen >= 32 then
873 buffer_size = buffer_size + 1
874 buffer[buffer_size] =
875 _byte_to_char[cache % 256]
876 .._byte_to_char[((cache-cache%256)/256 % 256)]
877 .._byte_to_char[((cache-cache%65536)/65536 % 256)]
878 .._byte_to_char[((cache-cache%16777216)/16777216 % 256)]
879 local rshift_mask = _pow2[32 - cache_bitlen + bitlen]
880 cache = (value - value%rshift_mask)/rshift_mask
881 cache_bitlen = cache_bitlen - 32
882 end
883 end
884
885 -- Write the entire string into the writer.
886 -- @param str The string being written
887 -- @return nil
888 local function WriteString(str)
889 for _ = 1, cache_bitlen, 8 do
890 buffer_size = buffer_size + 1
891 buffer[buffer_size] = string_char(cache % 256)
892 cache = (cache-cache%256)/256
893 end
894 cache_bitlen = 0
895 buffer_size = buffer_size + 1
896 buffer[buffer_size] = str
897 total_bitlen = total_bitlen + #str*8
898 end
899
900 -- Flush current stuffs in the writer and return it.
901 -- This operation will free most of the memory.
902 -- @param mode See the descrtion of the constant and the source code.
903 -- @return The total number of bits stored in the writer right now.
904 -- for byte boundary mode, it includes the padding bits.
905 -- for output mode, it does not include padding bits.
906 -- @return Return the outputs if mode is output.
907 local function FlushWriter(mode)
908 if mode == _FLUSH_MODE_NO_FLUSH then
909 return total_bitlen
910 end
911
912 if mode == _FLUSH_MODE_OUTPUT
913 or mode == _FLUSH_MODE_BYTE_BOUNDARY then
914 -- Full flush, also output cache.
915 -- Need to pad some bits if cache_bitlen is not multiple of 8.
916 local padding_bitlen = (8 - cache_bitlen % 8) % 8
917
918 if cache_bitlen > 0 then
919 -- padding with all 1 bits, mainly because "\000" is not
920 -- good to be tranmitted. I do this so "\000" is a little bit
921 -- less frequent.
922 cache = cache - _pow2[cache_bitlen]
923 + _pow2[cache_bitlen+padding_bitlen]
924 for _ = 1, cache_bitlen, 8 do
925 buffer_size = buffer_size + 1
926 buffer[buffer_size] = _byte_to_char[cache % 256]
927 cache = (cache-cache%256)/256
928 end
929
930 cache = 0
931 cache_bitlen = 0
932 end
933 if mode == _FLUSH_MODE_BYTE_BOUNDARY then
934 total_bitlen = total_bitlen + padding_bitlen
935 return total_bitlen
936 end
937 end
938
939 local flushed = table_concat(buffer)
940 buffer = {}
941 buffer_size = 0
942 result_buffer[#result_buffer+1] = flushed
943
944 if mode == _FLUSH_MODE_MEMORY_CLEANUP then
945 return total_bitlen
946 else
947 return total_bitlen, table_concat(result_buffer)
948 end
949 end
950
951 return WriteBits, WriteString, FlushWriter
952end
953
954-- Push an element into a max heap
955-- @param heap A max heap whose max element is at index 1.
956-- @param e The element to be pushed. Assume element "e" is a table
957-- and comparison is done via its first entry e[1]
958-- @param heap_size current number of elements in the heap.
959-- NOTE: There may be some garbage stored in
960-- heap[heap_size+1], heap[heap_size+2], etc..
961-- @return nil
962local function MinHeapPush(heap, e, heap_size)
963 heap_size = heap_size + 1
964 heap[heap_size] = e
965 local value = e[1]
966 local pos = heap_size
967 local parent_pos = (pos-pos%2)/2
968
969 while (parent_pos >= 1 and heap[parent_pos][1] > value) do
970 local t = heap[parent_pos]
971 heap[parent_pos] = e
972 heap[pos] = t
973 pos = parent_pos
974 parent_pos = (parent_pos-parent_pos%2)/2
975 end
976end
977
978-- Pop an element from a max heap
979-- @param heap A max heap whose max element is at index 1.
980-- @param heap_size current number of elements in the heap.
981-- @return the poped element
982-- Note: This function does not change table size of "heap" to save CPU time.
983local function MinHeapPop(heap, heap_size)
984 local top = heap[1]
985 local e = heap[heap_size]
986 local value = e[1]
987 heap[1] = e
988 heap[heap_size] = top
989 heap_size = heap_size - 1
990
991 local pos = 1
992 local left_child_pos = pos * 2
993 local right_child_pos = left_child_pos + 1
994
995 while (left_child_pos <= heap_size) do
996 local left_child = heap[left_child_pos]
997 if (right_child_pos <= heap_size
998 and heap[right_child_pos][1] < left_child[1]) then
999 local right_child = heap[right_child_pos]
1000 if right_child[1] < value then
1001 heap[right_child_pos] = e
1002 heap[pos] = right_child
1003 pos = right_child_pos
1004 left_child_pos = pos * 2
1005 right_child_pos = left_child_pos + 1
1006 else
1007 break
1008 end
1009 else
1010 if left_child[1] < value then
1011 heap[left_child_pos] = e
1012 heap[pos] = left_child
1013 pos = left_child_pos
1014 left_child_pos = pos * 2
1015 right_child_pos = left_child_pos + 1
1016 else
1017 break
1018 end
1019 end
1020 end
1021
1022 return top
1023end
1024
1025-- Deflate defines a special huffman tree, which is unique once the bit length
1026-- of huffman code of all symbols are known.
1027-- @param bitlen_count Number of symbols with a specific bitlen
1028-- @param symbol_bitlen The bit length of a symbol
1029-- @param max_symbol The max symbol among all symbols,
1030-- which is (number of symbols - 1)
1031-- @param max_bitlen The max huffman bit length among all symbols.
1032-- @return The huffman code of all symbols.
1033local function GetHuffmanCodeFromBitlen(bitlen_counts, symbol_bitlens
1034 , max_symbol, max_bitlen)
1035 local huffman_code = 0
1036 local next_codes = {}
1037 local symbol_huffman_codes = {}
1038 for bitlen = 1, max_bitlen do
1039 huffman_code = (huffman_code+(bitlen_counts[bitlen-1] or 0))*2
1040 next_codes[bitlen] = huffman_code
1041 end
1042 for symbol = 0, max_symbol do
1043 local bitlen = symbol_bitlens[symbol]
1044 if bitlen then
1045 huffman_code = next_codes[bitlen]
1046 next_codes[bitlen] = huffman_code + 1
1047
1048 -- Reverse the bits of huffman code,
1049 -- because most signifant bits of huffman code
1050 -- is stored first into the compressed data.
1051 -- @see RFC1951 Page5 Section 3.1.1
1052 if bitlen <= 9 then -- Have cached reverse for small bitlen.
1053 symbol_huffman_codes[symbol] =
1054 _reverse_bits_tbl[bitlen][huffman_code]
1055 else
1056 local reverse = 0
1057 for _ = 1, bitlen do
1058 reverse = reverse - reverse%2
1059 + (((reverse%2==1)
1060 or (huffman_code % 2) == 1) and 1 or 0)
1061 huffman_code = (huffman_code-huffman_code%2)/2
1062 reverse = reverse*2
1063 end
1064 symbol_huffman_codes[symbol] = (reverse-reverse%2)/2
1065 end
1066 end
1067 end
1068 return symbol_huffman_codes
1069end
1070
1071-- A helper function to sort heap elements
1072-- a[1], b[1] is the huffman frequency
1073-- a[2], b[2] is the symbol value.
1074local function SortByFirstThenSecond(a, b)
1075 return a[1] < b[1] or
1076 (a[1] == b[1] and a[2] < b[2])
1077end
1078
1079-- Calculate the huffman bit length and huffman code.
1080-- @param symbol_count: A table whose table key is the symbol, and table value
1081-- is the symbol frenquency (nil means 0 frequency).
1082-- @param max_bitlen: See description of return value.
1083-- @param max_symbol: The maximum symbol
1084-- @return a table whose key is the symbol, and the value is the huffman bit
1085-- bit length. We guarantee that all bit length <= max_bitlen.
1086-- For 0<=symbol<=max_symbol, table value could be nil if the frequency
1087-- of the symbol is 0 or nil.
1088-- @return a table whose key is the symbol, and the value is the huffman code.
1089-- @return a number indicating the maximum symbol whose bitlen is not 0.
1090local function GetHuffmanBitlenAndCode(symbol_counts, max_bitlen, max_symbol)
1091 local heap_size
1092 local max_non_zero_bitlen_symbol = -1
1093 local leafs = {}
1094 local heap = {}
1095 local symbol_bitlens = {}
1096 local symbol_codes = {}
1097 local bitlen_counts = {}
1098
1099 --[[
1100 tree[1]: weight, temporarily used as parent and bitLengths
1101 tree[2]: symbol
1102 tree[3]: left child
1103 tree[4]: right child
1104 --]]
1105 local number_unique_symbols = 0
1106 for symbol, count in pairs(symbol_counts) do
1107 number_unique_symbols = number_unique_symbols + 1
1108 leafs[number_unique_symbols] = {count, symbol}
1109 end
1110
1111 if (number_unique_symbols == 0) then
1112 -- no code.
1113 return {}, {}, -1
1114 elseif (number_unique_symbols == 1) then
1115 -- Only one code. In this case, its huffman code
1116 -- needs to be assigned as 0, and bit length is 1.
1117 -- This is the only case that the return result
1118 -- represents an imcomplete huffman tree.
1119 local symbol = leafs[1][2]
1120 symbol_bitlens[symbol] = 1
1121 symbol_codes[symbol] = 0
1122 return symbol_bitlens, symbol_codes, symbol
1123 else
1124 table_sort(leafs, SortByFirstThenSecond)
1125 heap_size = number_unique_symbols
1126 for i = 1, heap_size do
1127 heap[i] = leafs[i]
1128 end
1129
1130 while (heap_size > 1) do
1131 -- Note: pop does not change table size of heap
1132 local leftChild = MinHeapPop(heap, heap_size)
1133 heap_size = heap_size - 1
1134 local rightChild = MinHeapPop(heap, heap_size)
1135 heap_size = heap_size - 1
1136 local newNode =
1137 {leftChild[1]+rightChild[1], -1, leftChild, rightChild}
1138 MinHeapPush(heap, newNode, heap_size)
1139 heap_size = heap_size + 1
1140 end
1141
1142 -- Number of leafs whose bit length is greater than max_len.
1143 local number_bitlen_overflow = 0
1144
1145 -- Calculate bit length of all nodes
1146 local fifo = {heap[1], 0, 0, 0} -- preallocate some spaces.
1147 local fifo_size = 1
1148 local index = 1
1149 heap[1][1] = 0
1150 while (index <= fifo_size) do -- Breath first search
1151 local e = fifo[index]
1152 local bitlen = e[1]
1153 local symbol = e[2]
1154 local left_child = e[3]
1155 local right_child = e[4]
1156 if left_child then
1157 fifo_size = fifo_size + 1
1158 fifo[fifo_size] = left_child
1159 left_child[1] = bitlen + 1
1160 end
1161 if right_child then
1162 fifo_size = fifo_size + 1
1163 fifo[fifo_size] = right_child
1164 right_child[1] = bitlen + 1
1165 end
1166 index = index + 1
1167
1168 if (bitlen > max_bitlen) then
1169 number_bitlen_overflow = number_bitlen_overflow + 1
1170 bitlen = max_bitlen
1171 end
1172 if symbol >= 0 then
1173 symbol_bitlens[symbol] = bitlen
1174 max_non_zero_bitlen_symbol =
1175 (symbol > max_non_zero_bitlen_symbol)
1176 and symbol or max_non_zero_bitlen_symbol
1177 bitlen_counts[bitlen] = (bitlen_counts[bitlen] or 0) + 1
1178 end
1179 end
1180
1181 -- Resolve bit length overflow
1182 -- @see ZLib/trees.c:gen_bitlen(s, desc), for reference
1183 if (number_bitlen_overflow > 0) then
1184 repeat
1185 local bitlen = max_bitlen - 1
1186 while ((bitlen_counts[bitlen] or 0) == 0) do
1187 bitlen = bitlen - 1
1188 end
1189 -- move one leaf down the tree
1190 bitlen_counts[bitlen] = bitlen_counts[bitlen] - 1
1191 -- move one overflow item as its brother
1192 bitlen_counts[bitlen+1] = (bitlen_counts[bitlen+1] or 0) + 2
1193 bitlen_counts[max_bitlen] = bitlen_counts[max_bitlen] - 1
1194 number_bitlen_overflow = number_bitlen_overflow - 2
1195 until (number_bitlen_overflow <= 0)
1196
1197 index = 1
1198 for bitlen = max_bitlen, 1, -1 do
1199 local n = bitlen_counts[bitlen] or 0
1200 while (n > 0) do
1201 local symbol = leafs[index][2]
1202 symbol_bitlens[symbol] = bitlen
1203 n = n - 1
1204 index = index + 1
1205 end
1206 end
1207 end
1208
1209 symbol_codes = GetHuffmanCodeFromBitlen(bitlen_counts, symbol_bitlens,
1210 max_symbol, max_bitlen)
1211 return symbol_bitlens, symbol_codes, max_non_zero_bitlen_symbol
1212 end
1213end
1214
1215-- Calculate the first huffman header in the dynamic huffman block
1216-- @see RFC1951 Page 12
1217-- @param lcode_bitlen: The huffman bit length of literal/LZ77_length.
1218-- @param max_non_zero_bitlen_lcode: The maximum literal/LZ77_length symbol
1219-- whose huffman bit length is not zero.
1220-- @param dcode_bitlen: The huffman bit length of LZ77 distance.
1221-- @param max_non_zero_bitlen_dcode: The maximum LZ77 distance symbol
1222-- whose huffman bit length is not zero.
1223-- @return The run length encoded codes.
1224-- @return The extra bits. One entry for each rle code that needs extra bits.
1225-- (code == 16 or 17 or 18).
1226-- @return The count of appearance of each rle codes.
1227local function RunLengthEncodeHuffmanBitlen(
1228 lcode_bitlens,
1229 max_non_zero_bitlen_lcode,
1230 dcode_bitlens,
1231 max_non_zero_bitlen_dcode)
1232 local rle_code_tblsize = 0
1233 local rle_codes = {}
1234 local rle_code_counts = {}
1235 local rle_extra_bits_tblsize = 0
1236 local rle_extra_bits = {}
1237 local prev = nil
1238 local count = 0
1239
1240 -- If there is no distance code, assume one distance code of bit length 0.
1241 -- RFC1951: One distance code of zero bits means that
1242 -- there are no distance codes used at all (the data is all literals).
1243 max_non_zero_bitlen_dcode = (max_non_zero_bitlen_dcode < 0)
1244 and 0 or max_non_zero_bitlen_dcode
1245 local max_code = max_non_zero_bitlen_lcode+max_non_zero_bitlen_dcode+1
1246
1247 for code = 0, max_code+1 do
1248 local len = (code <= max_non_zero_bitlen_lcode)
1249 and (lcode_bitlens[code] or 0)
1250 or ((code <= max_code)
1251 and (dcode_bitlens[code-max_non_zero_bitlen_lcode-1] or 0) or nil)
1252 if len == prev then
1253 count = count + 1
1254 if len ~= 0 and count == 6 then
1255 rle_code_tblsize = rle_code_tblsize + 1
1256 rle_codes[rle_code_tblsize] = 16
1257 rle_extra_bits_tblsize = rle_extra_bits_tblsize + 1
1258 rle_extra_bits[rle_extra_bits_tblsize] = 3
1259 rle_code_counts[16] = (rle_code_counts[16] or 0) + 1
1260 count = 0
1261 elseif len == 0 and count == 138 then
1262 rle_code_tblsize = rle_code_tblsize + 1
1263 rle_codes[rle_code_tblsize] = 18
1264 rle_extra_bits_tblsize = rle_extra_bits_tblsize + 1
1265 rle_extra_bits[rle_extra_bits_tblsize] = 127
1266 rle_code_counts[18] = (rle_code_counts[18] or 0) + 1
1267 count = 0
1268 end
1269 else
1270 if count == 1 then
1271 rle_code_tblsize = rle_code_tblsize + 1
1272 rle_codes[rle_code_tblsize] = prev
1273 rle_code_counts[prev] = (rle_code_counts[prev] or 0) + 1
1274 elseif count == 2 then
1275 rle_code_tblsize = rle_code_tblsize + 1
1276 rle_codes[rle_code_tblsize] = prev
1277 rle_code_tblsize = rle_code_tblsize + 1
1278 rle_codes[rle_code_tblsize] = prev
1279 rle_code_counts[prev] = (rle_code_counts[prev] or 0) + 2
1280 elseif count >= 3 then
1281 rle_code_tblsize = rle_code_tblsize + 1
1282 local rleCode = (prev ~= 0) and 16 or (count <= 10 and 17 or 18)
1283 rle_codes[rle_code_tblsize] = rleCode
1284 rle_code_counts[rleCode] = (rle_code_counts[rleCode] or 0) + 1
1285 rle_extra_bits_tblsize = rle_extra_bits_tblsize + 1
1286 rle_extra_bits[rle_extra_bits_tblsize] =
1287 (count <= 10) and (count - 3) or (count - 11)
1288 end
1289
1290 prev = len
1291 if len and len ~= 0 then
1292 rle_code_tblsize = rle_code_tblsize + 1
1293 rle_codes[rle_code_tblsize] = len
1294 rle_code_counts[len] = (rle_code_counts[len] or 0) + 1
1295 count = 0
1296 else
1297 count = 1
1298 end
1299 end
1300 end
1301
1302 return rle_codes, rle_extra_bits, rle_code_counts
1303end
1304
1305-- Load the string into a table, in order to speed up LZ77.
1306-- Loop unrolled 16 times to speed this function up.
1307-- @param str The string to be loaded.
1308-- @param t The load destination
1309-- @param start str[index] will be the first character to be loaded.
1310-- @param end str[index] will be the last character to be loaded
1311-- @param offset str[index] will be loaded into t[index-offset]
1312-- @return t
1313local function LoadStringToTable(str, t, start, stop, offset)
1314 local i = start - offset
1315 while i <= stop - 15 - offset do
1316 t[i], t[i+1], t[i+2], t[i+3], t[i+4], t[i+5], t[i+6], t[i+7], t[i+8],
1317 t[i+9], t[i+10], t[i+11], t[i+12], t[i+13], t[i+14], t[i+15] =
1318 string_byte(str, i + offset, i + 15 + offset)
1319 i = i + 16
1320 end
1321 while (i <= stop - offset) do
1322 t[i] = string_byte(str, i + offset, i + offset)
1323 i = i + 1
1324 end
1325 return t
1326end
1327
1328-- Do LZ77 process. This function uses the majority of the CPU time.
1329-- @see zlib/deflate.c:deflate_fast(), zlib/deflate.c:deflate_slow()
1330-- @see https://github.com/madler/zlib/blob/master/doc/algorithm.txt
1331-- This function uses the algorithms used above. You should read the
1332-- algorithm.txt above to understand what is the hash function and the
1333-- lazy evaluation.
1334--
1335-- The special optimization used here is hash functions used here.
1336-- The hash function is just the multiplication of the three consective
1337-- characters. So if the hash matches, it guarantees 3 characters are matched.
1338-- This optimization can be implemented because Lua table is a hash table.
1339--
1340-- @param level integer that describes compression level.
1341-- @param string_table table that stores the value of string to be compressed.
1342-- The index of this table starts from 1.
1343-- The caller needs to make sure all values needed by this function
1344-- are loaded.
1345-- Assume "str" is the origin input string into the compressor
1346-- str[block_start]..str[block_end+3] needs to be loaded into
1347-- string_table[block_start-offset]..string_table[block_end-offset]
1348-- If dictionary is presented, the last 258 bytes of the dictionary
1349-- needs to be loaded into sing_table[-257..0]
1350-- (See more in the description of offset.)
1351-- @param hash_tables. The table key is the hash value (0<=hash<=16777216=256^3)
1352-- The table value is an array0 that stores the indexes of the
1353-- input data string to be compressed, such that
1354-- hash == str[index]*str[index+1]*str[index+2]
1355-- Indexes are ordered in this array.
1356-- @param block_start The indexes of the input data string to be compressed.
1357-- that starts the LZ77 block.
1358-- @param block_end The indexes of the input data string to be compressed.
1359-- that stores the LZ77 block.
1360-- @param offset str[index] is stored in string_table[index-offset],
1361-- This offset is mainly an optimization to limit the index
1362-- of string_table, so lua can access this table quicker.
1363-- @param dictionary See LibDeflate:CreateDictionary
1364-- @return literal/LZ77_length deflate codes.
1365-- @return the extra bits of literal/LZ77_length deflate codes.
1366-- @return the count of each literal/LZ77 deflate code.
1367-- @return LZ77 distance deflate codes.
1368-- @return the extra bits of LZ77 distance deflate codes.
1369-- @return the count of each LZ77 distance deflate code.
1370local function GetBlockLZ77Result(level, string_table, hash_tables, block_start,
1371 block_end, offset, dictionary)
1372 local config = _compression_level_configs[level]
1373 local config_use_lazy
1374 , config_good_prev_length
1375 , config_max_lazy_match
1376 , config_nice_length
1377 , config_max_hash_chain =
1378 config[1], config[2], config[3], config[4], config[5]
1379
1380 local config_max_insert_length = (not config_use_lazy)
1381 and config_max_lazy_match or 2147483646
1382 local config_good_hash_chain =
1383 (config_max_hash_chain-config_max_hash_chain%4/4)
1384
1385 local hash
1386
1387 local dict_hash_tables
1388 local dict_string_table
1389 local dict_string_len = 0
1390
1391 if dictionary then
1392 dict_hash_tables = dictionary.hash_tables
1393 dict_string_table = dictionary.string_table
1394 dict_string_len = dictionary.strlen
1395 assert(block_start == 1)
1396 if block_end >= block_start and dict_string_len >= 2 then
1397 hash = dict_string_table[dict_string_len-1]*65536
1398 + dict_string_table[dict_string_len]*256 + string_table[1]
1399 local t = hash_tables[hash]
1400 if not t then t = {}; hash_tables[hash] = t end
1401 t[#t+1] = -1
1402 end
1403 if block_end >= block_start+1 and dict_string_len >= 1 then
1404 hash = dict_string_table[dict_string_len]*65536
1405 + string_table[1]*256 + string_table[2]
1406 local t = hash_tables[hash]
1407 if not t then t = {}; hash_tables[hash] = t end
1408 t[#t+1] = 0
1409 end
1410 end
1411
1412 local dict_string_len_plus3 = dict_string_len + 3
1413
1414 hash = (string_table[block_start-offset] or 0)*256
1415 + (string_table[block_start+1-offset] or 0)
1416
1417 local lcodes = {}
1418 local lcode_tblsize = 0
1419 local lcodes_counts = {}
1420 local dcodes = {}
1421 local dcodes_tblsize = 0
1422 local dcodes_counts = {}
1423
1424 local lextra_bits = {}
1425 local lextra_bits_tblsize = 0
1426 local dextra_bits = {}
1427 local dextra_bits_tblsize = 0
1428
1429 local match_available = false
1430 local prev_len
1431 local prev_dist
1432 local cur_len = 0
1433 local cur_dist = 0
1434
1435 local index = block_start
1436 local index_end = block_end + (config_use_lazy and 1 or 0)
1437
1438 -- the zlib source code writes separate code for lazy evaluation and
1439 -- not lazy evaluation, which is easier to understand.
1440 -- I put them together, so it is a bit harder to understand.
1441 -- because I think this is easier for me to maintain it.
1442 while (index <= index_end) do
1443 local string_table_index = index - offset
1444 local offset_minus_three = offset - 3
1445 prev_len = cur_len
1446 prev_dist = cur_dist
1447 cur_len = 0
1448
1449 hash = (hash*256+(string_table[string_table_index+2] or 0))%16777216
1450
1451 local chain_index
1452 local cur_chain
1453 local hash_chain = hash_tables[hash]
1454 local chain_old_size
1455 if not hash_chain then
1456 chain_old_size = 0
1457 hash_chain = {}
1458 hash_tables[hash] = hash_chain
1459 if dict_hash_tables then
1460 cur_chain = dict_hash_tables[hash]
1461 chain_index = cur_chain and #cur_chain or 0
1462 else
1463 chain_index = 0
1464 end
1465 else
1466 chain_old_size = #hash_chain
1467 cur_chain = hash_chain
1468 chain_index = chain_old_size
1469 end
1470
1471 if index <= block_end then
1472 hash_chain[chain_old_size+1] = index
1473 end
1474
1475 if (chain_index > 0 and index + 2 <= block_end
1476 and (not config_use_lazy or prev_len < config_max_lazy_match)) then
1477
1478 local depth =
1479 (config_use_lazy and prev_len >= config_good_prev_length)
1480 and config_good_hash_chain or config_max_hash_chain
1481
1482 local max_len_minus_one = block_end - index
1483 max_len_minus_one = (max_len_minus_one >= 257) and 257 or max_len_minus_one
1484 max_len_minus_one = max_len_minus_one + string_table_index
1485 local string_table_index_plus_three = string_table_index + 3
1486
1487 while chain_index >= 1 and depth > 0 do
1488 local prev = cur_chain[chain_index]
1489
1490 if index - prev > 32768 then
1491 break
1492 end
1493 if prev < index then
1494 local sj = string_table_index_plus_three
1495
1496 if prev >= -257 then
1497 local pj = prev - offset_minus_three
1498 while (sj <= max_len_minus_one
1499 and string_table[pj]
1500 == string_table[sj]) do
1501 sj = sj + 1
1502 pj = pj + 1
1503 end
1504 else
1505 local pj = dict_string_len_plus3 + prev
1506 while (sj <= max_len_minus_one
1507 and dict_string_table[pj]
1508 == string_table[sj]) do
1509 sj = sj + 1
1510 pj = pj + 1
1511 end
1512 end
1513 local j = sj - string_table_index
1514 if j > cur_len then
1515 cur_len = j
1516 cur_dist = index - prev
1517 end
1518 if cur_len >= config_nice_length then
1519 break
1520 end
1521 end
1522
1523 chain_index = chain_index - 1
1524 depth = depth - 1
1525 if chain_index == 0 and prev > 0 and dict_hash_tables then
1526 cur_chain = dict_hash_tables[hash]
1527 chain_index = cur_chain and #cur_chain or 0
1528 end
1529 end
1530 end
1531
1532 if not config_use_lazy then
1533 prev_len, prev_dist = cur_len, cur_dist
1534 end
1535 if ((not config_use_lazy or match_available)
1536 and (prev_len > 3 or (prev_len == 3 and prev_dist < 4096))
1537 and cur_len <= prev_len )then
1538 local code = _length_to_deflate_code[prev_len]
1539 local length_extra_bits_bitlen =
1540 _length_to_deflate_extra_bitlen[prev_len]
1541 local dist_code, dist_extra_bits_bitlen, dist_extra_bits
1542 if prev_dist <= 256 then -- have cached code for small distance.
1543 dist_code = _dist256_to_deflate_code[prev_dist]
1544 dist_extra_bits = _dist256_to_deflate_extra_bits[prev_dist]
1545 dist_extra_bits_bitlen =
1546 _dist256_to_deflate_extra_bitlen[prev_dist]
1547 else
1548 dist_code = 16
1549 dist_extra_bits_bitlen = 7
1550 local a = 384
1551 local b = 512
1552
1553 while true do
1554 if prev_dist <= a then
1555 dist_extra_bits = (prev_dist-(b/2)-1) % (b/4)
1556 break
1557 elseif prev_dist <= b then
1558 dist_extra_bits = (prev_dist-(b/2)-1) % (b/4)
1559 dist_code = dist_code + 1
1560 break
1561 else
1562 dist_code = dist_code + 2
1563 dist_extra_bits_bitlen = dist_extra_bits_bitlen + 1
1564 a = a*2
1565 b = b*2
1566 end
1567 end
1568 end
1569 lcode_tblsize = lcode_tblsize + 1
1570 lcodes[lcode_tblsize] = code
1571 lcodes_counts[code] = (lcodes_counts[code] or 0) + 1
1572
1573 dcodes_tblsize = dcodes_tblsize + 1
1574 dcodes[dcodes_tblsize] = dist_code
1575 dcodes_counts[dist_code] = (dcodes_counts[dist_code] or 0) + 1
1576
1577 if length_extra_bits_bitlen > 0 then
1578 local lenExtraBits = _length_to_deflate_extra_bits[prev_len]
1579 lextra_bits_tblsize = lextra_bits_tblsize + 1
1580 lextra_bits[lextra_bits_tblsize] = lenExtraBits
1581 end
1582 if dist_extra_bits_bitlen > 0 then
1583 dextra_bits_tblsize = dextra_bits_tblsize + 1
1584 dextra_bits[dextra_bits_tblsize] = dist_extra_bits
1585 end
1586
1587 for i=index+1, index+prev_len-(config_use_lazy and 2 or 1) do
1588 hash = (hash*256+(string_table[i-offset+2] or 0))%16777216
1589 if prev_len <= config_max_insert_length then
1590 hash_chain = hash_tables[hash]
1591 if not hash_chain then
1592 hash_chain = {}
1593 hash_tables[hash] = hash_chain
1594 end
1595 hash_chain[#hash_chain+1] = i
1596 end
1597 end
1598 index = index + prev_len - (config_use_lazy and 1 or 0)
1599 match_available = false
1600 elseif (not config_use_lazy) or match_available then
1601 local code = string_table[config_use_lazy
1602 and (string_table_index-1) or string_table_index]
1603 lcode_tblsize = lcode_tblsize + 1
1604 lcodes[lcode_tblsize] = code
1605 lcodes_counts[code] = (lcodes_counts[code] or 0) + 1
1606 index = index + 1
1607 else
1608 match_available = true
1609 index = index + 1
1610 end
1611 end
1612
1613 -- Write "end of block" symbol
1614 lcode_tblsize = lcode_tblsize + 1
1615 lcodes[lcode_tblsize] = 256
1616 lcodes_counts[256] = (lcodes_counts[256] or 0) + 1
1617
1618 return lcodes, lextra_bits, lcodes_counts, dcodes, dextra_bits
1619 , dcodes_counts
1620end
1621
1622-- Get the header data of dynamic block.
1623-- @param lcodes_count The count of each literal/LZ77_length codes.
1624-- @param dcodes_count The count of each Lz77 distance codes.
1625-- @return a lots of stuffs.
1626-- @see RFC1951 Page 12
1627local function GetBlockDynamicHuffmanHeader(lcodes_counts, dcodes_counts)
1628 local lcodes_huffman_bitlens, lcodes_huffman_codes
1629 , max_non_zero_bitlen_lcode =
1630 GetHuffmanBitlenAndCode(lcodes_counts, 15, 285)
1631 local dcodes_huffman_bitlens, dcodes_huffman_codes
1632 , max_non_zero_bitlen_dcode =
1633 GetHuffmanBitlenAndCode(dcodes_counts, 15, 29)
1634
1635 local rle_deflate_codes, rle_extra_bits, rle_codes_counts =
1636 RunLengthEncodeHuffmanBitlen(lcodes_huffman_bitlens
1637 ,max_non_zero_bitlen_lcode, dcodes_huffman_bitlens
1638 , max_non_zero_bitlen_dcode)
1639
1640 local rle_codes_huffman_bitlens, rle_codes_huffman_codes =
1641 GetHuffmanBitlenAndCode(rle_codes_counts, 7, 18)
1642
1643 local HCLEN = 0
1644 for i = 1, 19 do
1645 local symbol = _rle_codes_huffman_bitlen_order[i]
1646 local length = rle_codes_huffman_bitlens[symbol] or 0
1647 if length ~= 0 then
1648 HCLEN = i
1649 end
1650 end
1651
1652 HCLEN = HCLEN - 4
1653 local HLIT = max_non_zero_bitlen_lcode + 1 - 257
1654 local HDIST = max_non_zero_bitlen_dcode + 1 - 1
1655 if HDIST < 0 then HDIST = 0 end
1656
1657 return HLIT, HDIST, HCLEN, rle_codes_huffman_bitlens
1658 , rle_codes_huffman_codes, rle_deflate_codes, rle_extra_bits
1659 , lcodes_huffman_bitlens, lcodes_huffman_codes
1660 , dcodes_huffman_bitlens, dcodes_huffman_codes
1661end
1662
1663-- Get the size of dynamic block without writing any bits into the writer.
1664-- @param ... Read the source code of GetBlockDynamicHuffmanHeader()
1665-- @return the bit length of the dynamic block
1666local function GetDynamicHuffmanBlockSize(lcodes, dcodes, HCLEN
1667 , rle_codes_huffman_bitlens, rle_deflate_codes
1668 , lcodes_huffman_bitlens, dcodes_huffman_bitlens)
1669
1670 local block_bitlen = 17 -- 1+2+5+5+4
1671 block_bitlen = block_bitlen + (HCLEN+4)*3
1672
1673 for i = 1, #rle_deflate_codes do
1674 local code = rle_deflate_codes[i]
1675 block_bitlen = block_bitlen + rle_codes_huffman_bitlens[code]
1676 if code >= 16 then
1677 block_bitlen = block_bitlen +
1678 ((code == 16) and 2 or (code == 17 and 3 or 7))
1679 end
1680 end
1681
1682 local length_code_count = 0
1683 for i = 1, #lcodes do
1684 local code = lcodes[i]
1685 local huffman_bitlen = lcodes_huffman_bitlens[code]
1686 block_bitlen = block_bitlen + huffman_bitlen
1687 if code > 256 then -- Length code
1688 length_code_count = length_code_count + 1
1689 if code > 264 and code < 285 then -- Length code with extra bits
1690 local extra_bits_bitlen =
1691 _literal_deflate_code_to_extra_bitlen[code-256]
1692 block_bitlen = block_bitlen + extra_bits_bitlen
1693 end
1694 local dist_code = dcodes[length_code_count]
1695 local dist_huffman_bitlen = dcodes_huffman_bitlens[dist_code]
1696 block_bitlen = block_bitlen + dist_huffman_bitlen
1697
1698 if dist_code > 3 then -- dist code with extra bits
1699 local dist_extra_bits_bitlen = (dist_code-dist_code%2)/2 - 1
1700 block_bitlen = block_bitlen + dist_extra_bits_bitlen
1701 end
1702 end
1703 end
1704 return block_bitlen
1705end
1706
1707-- Write dynamic block.
1708-- @param ... Read the source code of GetBlockDynamicHuffmanHeader()
1709local function CompressDynamicHuffmanBlock(WriteBits, is_last_block
1710 , lcodes, lextra_bits, dcodes, dextra_bits, HLIT, HDIST, HCLEN
1711 , rle_codes_huffman_bitlens, rle_codes_huffman_codes
1712 , rle_deflate_codes, rle_extra_bits
1713 , lcodes_huffman_bitlens, lcodes_huffman_codes
1714 , dcodes_huffman_bitlens, dcodes_huffman_codes)
1715
1716 WriteBits(is_last_block and 1 or 0, 1) -- Last block identifier
1717 WriteBits(2, 2) -- Dynamic Huffman block identifier
1718
1719 WriteBits(HLIT, 5)
1720 WriteBits(HDIST, 5)
1721 WriteBits(HCLEN, 4)
1722
1723 for i = 1, HCLEN+4 do
1724 local symbol = _rle_codes_huffman_bitlen_order[i]
1725 local length = rle_codes_huffman_bitlens[symbol] or 0
1726 WriteBits(length, 3)
1727 end
1728
1729 local rleExtraBitsIndex = 1
1730 for i=1, #rle_deflate_codes do
1731 local code = rle_deflate_codes[i]
1732 WriteBits(rle_codes_huffman_codes[code]
1733 , rle_codes_huffman_bitlens[code])
1734 if code >= 16 then
1735 local extraBits = rle_extra_bits[rleExtraBitsIndex]
1736 WriteBits(extraBits, (code == 16) and 2 or (code == 17 and 3 or 7))
1737 rleExtraBitsIndex = rleExtraBitsIndex + 1
1738 end
1739 end
1740
1741 local length_code_count = 0
1742 local length_code_with_extra_count = 0
1743 local dist_code_with_extra_count = 0
1744
1745 for i=1, #lcodes do
1746 local deflate_codee = lcodes[i]
1747 local huffman_code = lcodes_huffman_codes[deflate_codee]
1748 local huffman_bitlen = lcodes_huffman_bitlens[deflate_codee]
1749 WriteBits(huffman_code, huffman_bitlen)
1750 if deflate_codee > 256 then -- Length code
1751 length_code_count = length_code_count + 1
1752 if deflate_codee > 264 and deflate_codee < 285 then
1753 -- Length code with extra bits
1754 length_code_with_extra_count = length_code_with_extra_count + 1
1755 local extra_bits = lextra_bits[length_code_with_extra_count]
1756 local extra_bits_bitlen =
1757 _literal_deflate_code_to_extra_bitlen[deflate_codee-256]
1758 WriteBits(extra_bits, extra_bits_bitlen)
1759 end
1760 -- Write distance code
1761 local dist_deflate_code = dcodes[length_code_count]
1762 local dist_huffman_code = dcodes_huffman_codes[dist_deflate_code]
1763 local dist_huffman_bitlen =
1764 dcodes_huffman_bitlens[dist_deflate_code]
1765 WriteBits(dist_huffman_code, dist_huffman_bitlen)
1766
1767 if dist_deflate_code > 3 then -- dist code with extra bits
1768 dist_code_with_extra_count = dist_code_with_extra_count + 1
1769 local dist_extra_bits = dextra_bits[dist_code_with_extra_count]
1770 local dist_extra_bits_bitlen =
1771 (dist_deflate_code-dist_deflate_code%2)/2 - 1
1772 WriteBits(dist_extra_bits, dist_extra_bits_bitlen)
1773 end
1774 end
1775 end
1776end
1777
1778-- Get the size of fixed block without writing any bits into the writer.
1779-- @param lcodes literal/LZ77_length deflate codes
1780-- @param decodes LZ77 distance deflate codes
1781-- @return the bit length of the fixed block
1782local function GetFixedHuffmanBlockSize(lcodes, dcodes)
1783 local block_bitlen = 3
1784 local length_code_count = 0
1785 for i=1, #lcodes do
1786 local code = lcodes[i]
1787 local huffman_bitlen = _fix_block_literal_huffman_bitlen[code]
1788 block_bitlen = block_bitlen + huffman_bitlen
1789 if code > 256 then -- Length code
1790 length_code_count = length_code_count + 1
1791 if code > 264 and code < 285 then -- Length code with extra bits
1792 local extra_bits_bitlen =
1793 _literal_deflate_code_to_extra_bitlen[code-256]
1794 block_bitlen = block_bitlen + extra_bits_bitlen
1795 end
1796 local dist_code = dcodes[length_code_count]
1797 block_bitlen = block_bitlen + 5
1798
1799 if dist_code > 3 then -- dist code with extra bits
1800 local dist_extra_bits_bitlen =
1801 (dist_code-dist_code%2)/2 - 1
1802 block_bitlen = block_bitlen + dist_extra_bits_bitlen
1803 end
1804 end
1805 end
1806 return block_bitlen
1807end
1808
1809-- Write fixed block.
1810-- @param lcodes literal/LZ77_length deflate codes
1811-- @param decodes LZ77 distance deflate codes
1812local function CompressFixedHuffmanBlock(WriteBits, is_last_block,
1813 lcodes, lextra_bits, dcodes, dextra_bits)
1814 WriteBits(is_last_block and 1 or 0, 1) -- Last block identifier
1815 WriteBits(1, 2) -- Fixed Huffman block identifier
1816 local length_code_count = 0
1817 local length_code_with_extra_count = 0
1818 local dist_code_with_extra_count = 0
1819 for i=1, #lcodes do
1820 local deflate_code = lcodes[i]
1821 local huffman_code = _fix_block_literal_huffman_code[deflate_code]
1822 local huffman_bitlen = _fix_block_literal_huffman_bitlen[deflate_code]
1823 WriteBits(huffman_code, huffman_bitlen)
1824 if deflate_code > 256 then -- Length code
1825 length_code_count = length_code_count + 1
1826 if deflate_code > 264 and deflate_code < 285 then
1827 -- Length code with extra bits
1828 length_code_with_extra_count = length_code_with_extra_count + 1
1829 local extra_bits = lextra_bits[length_code_with_extra_count]
1830 local extra_bits_bitlen =
1831 _literal_deflate_code_to_extra_bitlen[deflate_code-256]
1832 WriteBits(extra_bits, extra_bits_bitlen)
1833 end
1834 -- Write distance code
1835 local dist_code = dcodes[length_code_count]
1836 local dist_huffman_code = _fix_block_dist_huffman_code[dist_code]
1837 WriteBits(dist_huffman_code, 5)
1838
1839 if dist_code > 3 then -- dist code with extra bits
1840 dist_code_with_extra_count = dist_code_with_extra_count + 1
1841 local dist_extra_bits = dextra_bits[dist_code_with_extra_count]
1842 local dist_extra_bits_bitlen = (dist_code-dist_code%2)/2 - 1
1843 WriteBits(dist_extra_bits, dist_extra_bits_bitlen)
1844 end
1845 end
1846 end
1847end
1848
1849-- Get the size of store block without writing any bits into the writer.
1850-- @param block_start The start index of the origin input string
1851-- @param block_end The end index of the origin input string
1852-- @param Total bit lens had been written into the compressed result before,
1853-- because store block needs to shift to byte boundary.
1854-- @return the bit length of the fixed block
1855local function GetStoreBlockSize(block_start, block_end, total_bitlen)
1856 assert(block_end-block_start+1 <= 65535)
1857 local block_bitlen = 3
1858 total_bitlen = total_bitlen + 3
1859 local padding_bitlen = (8-total_bitlen%8)%8
1860 block_bitlen = block_bitlen + padding_bitlen
1861 block_bitlen = block_bitlen + 32
1862 block_bitlen = block_bitlen + (block_end - block_start + 1) * 8
1863 return block_bitlen
1864end
1865
1866-- Write the store block.
1867-- @param ... lots of stuffs
1868-- @return nil
1869local function CompressStoreBlock(WriteBits, WriteString, is_last_block, str
1870 , block_start, block_end, total_bitlen)
1871 assert(block_end-block_start+1 <= 65535)
1872 WriteBits(is_last_block and 1 or 0, 1) -- Last block identifer.
1873 WriteBits(0, 2) -- Store block identifier.
1874 total_bitlen = total_bitlen + 3
1875 local padding_bitlen = (8-total_bitlen%8)%8
1876 if padding_bitlen > 0 then
1877 WriteBits(_pow2[padding_bitlen]-1, padding_bitlen)
1878 end
1879 local size = block_end - block_start + 1
1880 WriteBits(size, 16)
1881
1882 -- Write size's one's complement
1883 local comp = (255 - size % 256) + (255 - (size-size%256)/256)*256
1884 WriteBits(comp, 16)
1885
1886 WriteString(str:sub(block_start, block_end))
1887end
1888
1889-- Do the deflate
1890-- Currently using a simple way to determine the block size
1891-- (This is why the compression ratio is little bit worse than zlib when
1892-- the input size is very large
1893-- The first block is 64KB, the following block is 32KB.
1894-- After each block, there is a memory cleanup operation.
1895-- This is not a fast operation, but it is needed to save memory usage, so
1896-- the memory usage does not grow unboundly. If the data size is less than
1897-- 64KB, then memory cleanup won't happen.
1898-- This function determines whether to use store/fixed/dynamic blocks by
1899-- calculating the block size of each block type and chooses the smallest one.
1900local function Deflate(configs, WriteBits, WriteString, FlushWriter, str
1901 , dictionary)
1902 local string_table = {}
1903 local hash_tables = {}
1904 local is_last_block = nil
1905 local block_start
1906 local block_end
1907 local bitlen_written
1908 local total_bitlen = FlushWriter(_FLUSH_MODE_NO_FLUSH)
1909 local strlen = #str
1910 local offset
1911
1912 local level
1913 local strategy
1914 if configs then
1915 if configs.level then
1916 level = configs.level
1917 end
1918 if configs.strategy then
1919 strategy = configs.strategy
1920 end
1921 end
1922
1923 if not level then
1924 if strlen < 2048 then
1925 level = 7
1926 elseif strlen > 65536 then
1927 level = 3
1928 else
1929 level = 5
1930 end
1931 end
1932
1933 while not is_last_block do
1934 if not block_start then
1935 block_start = 1
1936 block_end = 64*1024 - 1
1937 offset = 0
1938 else
1939 block_start = block_end + 1
1940 block_end = block_end + 32*1024
1941 offset = block_start - 32*1024 - 1
1942 end
1943
1944 if block_end >= strlen then
1945 block_end = strlen
1946 is_last_block = true
1947 else
1948 is_last_block = false
1949 end
1950
1951 local lcodes, lextra_bits, lcodes_counts, dcodes, dextra_bits
1952 , dcodes_counts
1953
1954 local HLIT, HDIST, HCLEN, rle_codes_huffman_bitlens
1955 , rle_codes_huffman_codes, rle_deflate_codes
1956 , rle_extra_bits, lcodes_huffman_bitlens, lcodes_huffman_codes
1957 , dcodes_huffman_bitlens, dcodes_huffman_codes
1958
1959 local dynamic_block_bitlen
1960 local fixed_block_bitlen
1961 local store_block_bitlen
1962
1963 if level ~= 0 then
1964
1965 -- GetBlockLZ77 needs block_start to block_end+3 to be loaded.
1966 LoadStringToTable(str, string_table, block_start, block_end + 3
1967 , offset)
1968 if block_start == 1 and dictionary then
1969 local dict_string_table = dictionary.string_table
1970 local dict_strlen = dictionary.strlen
1971 for i=0, (-dict_strlen+1)<-257
1972 and -257 or (-dict_strlen+1), -1 do
1973 string_table[i] = dict_string_table[dict_strlen+i]
1974 end
1975 end
1976
1977 if strategy == "huffman_only" then
1978 lcodes = {}
1979 LoadStringToTable(str, lcodes, block_start, block_end
1980 , block_start-1)
1981 lextra_bits = {}
1982 lcodes_counts = {}
1983 lcodes[block_end - block_start+2] = 256 -- end of block
1984 for i=1, block_end - block_start+2 do
1985 local code = lcodes[i]
1986 lcodes_counts[code] = (lcodes_counts[code] or 0) + 1
1987 end
1988 dcodes = {}
1989 dextra_bits = {}
1990 dcodes_counts = {}
1991 else
1992 lcodes, lextra_bits, lcodes_counts, dcodes, dextra_bits
1993 , dcodes_counts = GetBlockLZ77Result(level, string_table
1994 , hash_tables, block_start, block_end, offset, dictionary
1995 )
1996 end
1997
1998 HLIT, HDIST, HCLEN, rle_codes_huffman_bitlens
1999 , rle_codes_huffman_codes, rle_deflate_codes
2000 , rle_extra_bits, lcodes_huffman_bitlens, lcodes_huffman_codes
2001 , dcodes_huffman_bitlens, dcodes_huffman_codes =
2002 GetBlockDynamicHuffmanHeader(lcodes_counts, dcodes_counts)
2003 dynamic_block_bitlen = GetDynamicHuffmanBlockSize(
2004 lcodes, dcodes, HCLEN, rle_codes_huffman_bitlens
2005 , rle_deflate_codes, lcodes_huffman_bitlens
2006 , dcodes_huffman_bitlens)
2007 fixed_block_bitlen = GetFixedHuffmanBlockSize(lcodes, dcodes)
2008 end
2009
2010 store_block_bitlen = GetStoreBlockSize(block_start, block_end
2011 , total_bitlen)
2012
2013 local min_bitlen = store_block_bitlen
2014 min_bitlen = (fixed_block_bitlen and fixed_block_bitlen < min_bitlen)
2015 and fixed_block_bitlen or min_bitlen
2016 min_bitlen = (dynamic_block_bitlen
2017 and dynamic_block_bitlen < min_bitlen)
2018 and dynamic_block_bitlen or min_bitlen
2019
2020 if level == 0 or (strategy ~= "fixed" and strategy ~= "dynamic" and
2021 store_block_bitlen == min_bitlen) then
2022 CompressStoreBlock(WriteBits, WriteString, is_last_block
2023 , str, block_start, block_end, total_bitlen)
2024 total_bitlen = total_bitlen + store_block_bitlen
2025 elseif strategy ~= "dynamic" and (
2026 strategy == "fixed" or fixed_block_bitlen == min_bitlen) then
2027 CompressFixedHuffmanBlock(WriteBits, is_last_block,
2028 lcodes, lextra_bits, dcodes, dextra_bits)
2029 total_bitlen = total_bitlen + fixed_block_bitlen
2030 elseif strategy == "dynamic" or dynamic_block_bitlen == min_bitlen then
2031 CompressDynamicHuffmanBlock(WriteBits, is_last_block, lcodes
2032 , lextra_bits, dcodes, dextra_bits, HLIT, HDIST, HCLEN
2033 , rle_codes_huffman_bitlens, rle_codes_huffman_codes
2034 , rle_deflate_codes, rle_extra_bits
2035 , lcodes_huffman_bitlens, lcodes_huffman_codes
2036 , dcodes_huffman_bitlens, dcodes_huffman_codes)
2037 total_bitlen = total_bitlen + dynamic_block_bitlen
2038 end
2039
2040 if is_last_block then
2041 bitlen_written = FlushWriter(_FLUSH_MODE_NO_FLUSH)
2042 else
2043 bitlen_written = FlushWriter(_FLUSH_MODE_MEMORY_CLEANUP)
2044 end
2045
2046 assert(bitlen_written == total_bitlen)
2047
2048 -- Memory clean up, so memory consumption does not always grow linearly
2049 -- , even if input string is > 64K.
2050 -- Not a very efficient operation, but this operation won't happen
2051 -- when the input data size is less than 64K.
2052 if not is_last_block then
2053 local j
2054 if dictionary and block_start == 1 then
2055 j = 0
2056 while (string_table[j]) do
2057 string_table[j] = nil
2058 j = j - 1
2059 end
2060 end
2061 dictionary = nil
2062 j = 1
2063 for i = block_end-32767, block_end do
2064 string_table[j] = string_table[i-offset]
2065 j = j + 1
2066 end
2067
2068 for k, t in pairs(hash_tables) do
2069 local tSize = #t
2070 if tSize > 0 and block_end+1 - t[1] > 32768 then
2071 if tSize == 1 then
2072 hash_tables[k] = nil
2073 else
2074 local new = {}
2075 local newSize = 0
2076 for i = 2, tSize do
2077 j = t[i]
2078 if block_end+1 - j <= 32768 then
2079 newSize = newSize + 1
2080 new[newSize] = j
2081 end
2082 end
2083 hash_tables[k] = new
2084 end
2085 end
2086 end
2087 end
2088 end
2089end
2090
2091--- The description to compression configuration table. <br>
2092-- Any field can be nil to use its default. <br>
2093-- Table with keys other than those below is an invalid table.
2094-- @class table
2095-- @name compression_configs
2096-- @field level The compression level ranged from 0 to 9. 0 is no compression.
2097-- 9 is the slowest but best compression. Use nil for default level.
2098-- @field strategy The compression strategy. "fixed" to only use fixed deflate
2099-- compression block. "dynamic" to only use dynamic block. "huffman_only" to
2100-- do no LZ77 compression. Only do huffman compression.
2101
2102
2103-- @see LibDeflate:CompressDeflate(str, configs)
2104-- @see LibDeflate:CompressDeflateWithDict(str, dictionary, configs)
2105local function CompressDeflateInternal(str, dictionary, configs)
2106 local WriteBits, WriteString, FlushWriter = CreateWriter()
2107 Deflate(configs, WriteBits, WriteString, FlushWriter, str, dictionary)
2108 local total_bitlen, result = FlushWriter(_FLUSH_MODE_OUTPUT)
2109 local padding_bitlen = (8-total_bitlen%8)%8
2110 return result, padding_bitlen
2111end
2112
2113-- @see LibDeflate:CompressZlib
2114-- @see LibDeflate:CompressZlibWithDict
2115local function CompressZlibInternal(str, dictionary, configs)
2116 local WriteBits, WriteString, FlushWriter = CreateWriter()
2117
2118 local CM = 8 -- Compression method
2119 local CINFO = 7 --Window Size = 32K
2120 local CMF = CINFO*16+CM
2121 WriteBits(CMF, 8)
2122
2123 local FDIST = dictionary and 1 or 0
2124 local FLEVEL = 2 -- Default compression
2125 local FLG = FLEVEL*64+FDIST*32
2126 local FCHECK = (31-(CMF*256+FLG)%31)
2127 -- The FCHECK value must be such that CMF and FLG,
2128 -- when viewed as a 16-bit unsigned integer stored
2129 -- in MSB order (CMF*256 + FLG), is a multiple of 31.
2130 FLG = FLG + FCHECK
2131 WriteBits(FLG, 8)
2132
2133 if FDIST == 1 then
2134 local adler32 = dictionary.adler32
2135 local byte0 = adler32 % 256
2136 adler32 = (adler32 - byte0) / 256
2137 local byte1 = adler32 % 256
2138 adler32 = (adler32 - byte1) / 256
2139 local byte2 = adler32 % 256
2140 adler32 = (adler32 - byte2) / 256
2141 local byte3 = adler32 % 256
2142 WriteBits(byte3, 8)
2143 WriteBits(byte2, 8)
2144 WriteBits(byte1, 8)
2145 WriteBits(byte0, 8)
2146 end
2147
2148 Deflate(configs, WriteBits, WriteString, FlushWriter, str, dictionary)
2149 FlushWriter(_FLUSH_MODE_BYTE_BOUNDARY)
2150
2151 local adler32 = LibDeflate:Adler32(str)
2152
2153 -- Most significant byte first
2154 local byte3 = adler32%256
2155 adler32 = (adler32 - byte3) / 256
2156 local byte2 = adler32%256
2157 adler32 = (adler32 - byte2) / 256
2158 local byte1 = adler32%256
2159 adler32 = (adler32 - byte1) / 256
2160 local byte0 = adler32%256
2161
2162 WriteBits(byte0, 8)
2163 WriteBits(byte1, 8)
2164 WriteBits(byte2, 8)
2165 WriteBits(byte3, 8)
2166 local total_bitlen, result = FlushWriter(_FLUSH_MODE_OUTPUT)
2167 local padding_bitlen = (8-total_bitlen%8)%8
2168 return result, padding_bitlen
2169end
2170
2171--- Compress using the raw deflate format.
2172-- @param str [string] The data to be compressed.
2173-- @param configs [table/nil] The configuration table to control the compression
2174-- . If nil, use the default configuration.
2175-- @return [string] The compressed data.
2176-- @return [integer] The number of bits padded at the end of output.
2177-- 0 <= bits < 8 <br>
2178-- This means the most significant "bits" of the last byte of the returned
2179-- compressed data are padding bits and they don't affect decompression.
2180-- You don't need to use this value unless you want to do some postprocessing
2181-- to the compressed data.
2182-- @see compression_configs
2183-- @see LibDeflate:DecompressDeflate
2184function LibDeflate:CompressDeflate(str, configs)
2185 local arg_valid, arg_err = IsValidArguments(str, false, nil, true, configs)
2186 if not arg_valid then
2187 error(("Usage: LibDeflate:CompressDeflate(str, configs): "
2188 ..arg_err), 2)
2189 end
2190 return CompressDeflateInternal(str, nil, configs)
2191end
2192
2193--- Compress using the raw deflate format with a preset dictionary.
2194-- @param str [string] The data to be compressed.
2195-- @param dictionary [table] The preset dictionary produced by
2196-- LibDeflate:CreateDictionary
2197-- @param configs [table/nil] The configuration table to control the compression
2198-- . If nil, use the default configuration.
2199-- @return [string] The compressed data.
2200-- @return [integer] The number of bits padded at the end of output.
2201-- 0 <= bits < 8 <br>
2202-- This means the most significant "bits" of the last byte of the returned
2203-- compressed data are padding bits and they don't affect decompression.
2204-- You don't need to use this value unless you want to do some postprocessing
2205-- to the compressed data.
2206-- @see compression_configs
2207-- @see LibDeflate:CreateDictionary
2208-- @see LibDeflate:DecompressDeflateWithDict
2209function LibDeflate:CompressDeflateWithDict(str, dictionary, configs)
2210 local arg_valid, arg_err = IsValidArguments(str, true, dictionary
2211 , true, configs)
2212 if not arg_valid then
2213 error(("Usage: LibDeflate:CompressDeflateWithDict"
2214 .."(str, dictionary, configs): "
2215 ..arg_err), 2)
2216 end
2217 return CompressDeflateInternal(str, dictionary, configs)
2218end
2219
2220--- Compress using the zlib format.
2221-- @param str [string] the data to be compressed.
2222-- @param configs [table/nil] The configuration table to control the compression
2223-- . If nil, use the default configuration.
2224-- @return [string] The compressed data.
2225-- @return [integer] The number of bits padded at the end of output.
2226-- Should always be 0.
2227-- Zlib formatted compressed data never has padding bits at the end.
2228-- @see compression_configs
2229-- @see LibDeflate:DecompressZlib
2230function LibDeflate:CompressZlib(str, configs)
2231 local arg_valid, arg_err = IsValidArguments(str, false, nil, true, configs)
2232 if not arg_valid then
2233 error(("Usage: LibDeflate:CompressZlib(str, configs): "
2234 ..arg_err), 2)
2235 end
2236 return CompressZlibInternal(str, nil, configs)
2237end
2238
2239--- Compress using the zlib format with a preset dictionary.
2240-- @param str [string] the data to be compressed.
2241-- @param dictionary [table] A preset dictionary produced
2242-- by LibDeflate:CreateDictionary()
2243-- @param configs [table/nil] The configuration table to control the compression
2244-- . If nil, use the default configuration.
2245-- @return [string] The compressed data.
2246-- @return [integer] The number of bits padded at the end of output.
2247-- Should always be 0.
2248-- Zlib formatted compressed data never has padding bits at the end.
2249-- @see compression_configs
2250-- @see LibDeflate:CreateDictionary
2251-- @see LibDeflate:DecompressZlibWithDict
2252function LibDeflate:CompressZlibWithDict(str, dictionary, configs)
2253 local arg_valid, arg_err = IsValidArguments(str, true, dictionary
2254 , true, configs)
2255 if not arg_valid then
2256 error(("Usage: LibDeflate:CompressZlibWithDict"
2257 .."(str, dictionary, configs): "
2258 ..arg_err), 2)
2259 end
2260 return CompressZlibInternal(str, dictionary, configs)
2261end
2262
2263--[[ --------------------------------------------------------------------------
2264 Decompress code
2265--]] --------------------------------------------------------------------------
2266
2267--[[
2268 Create a reader to easily reader stuffs as the unit of bits.
2269 Return values:
2270 1. ReadBits(bitlen)
2271 2. ReadBytes(bytelen, buffer, buffer_size)
2272 3. Decode(huffman_bitlen_count, huffman_symbol, min_bitlen)
2273 4. ReaderBitlenLeft()
2274 5. SkipToByteBoundary()
2275--]]
2276local function CreateReader(input_string)
2277 local input = input_string
2278 local input_strlen = #input_string
2279 local input_next_byte_pos = 1
2280 local cache_bitlen = 0
2281 local cache = 0
2282
2283 -- Read some bits.
2284 -- To improve speed, this function does not
2285 -- check if the input has been exhausted.
2286 -- Use ReaderBitlenLeft() < 0 to check it.
2287 -- @param bitlen the number of bits to read
2288 -- @return the data is read.
2289 local function ReadBits(bitlen)
2290 local rshift_mask = _pow2[bitlen]
2291 local code
2292 if bitlen <= cache_bitlen then
2293 code = cache % rshift_mask
2294 cache = (cache - code) / rshift_mask
2295 cache_bitlen = cache_bitlen - bitlen
2296 else -- Whether input has been exhausted is not checked.
2297 local lshift_mask = _pow2[cache_bitlen]
2298 local byte1, byte2, byte3, byte4 = string_byte(input
2299 , input_next_byte_pos, input_next_byte_pos+3)
2300 -- This requires lua number to be at least double ()
2301 cache = cache + ((byte1 or 0)+(byte2 or 0)*256
2302 + (byte3 or 0)*65536+(byte4 or 0)*16777216)*lshift_mask
2303 input_next_byte_pos = input_next_byte_pos + 4
2304 cache_bitlen = cache_bitlen + 32 - bitlen
2305 code = cache % rshift_mask
2306 cache = (cache - code) / rshift_mask
2307 end
2308 return code
2309 end
2310
2311 -- Read some bytes from the reader.
2312 -- Assume reader is on the byte boundary.
2313 -- @param bytelen The number of bytes to be read.
2314 -- @param buffer The byte read will be stored into this buffer.
2315 -- @param buffer_size The buffer will be modified starting from
2316 -- buffer[buffer_size+1], ending at buffer[buffer_size+bytelen-1]
2317 -- @return the new buffer_size
2318 local function ReadBytes(bytelen, buffer, buffer_size)
2319 assert(cache_bitlen % 8 == 0)
2320
2321 local byte_from_cache = (cache_bitlen/8 < bytelen)
2322 and (cache_bitlen/8) or bytelen
2323 for _=1, byte_from_cache do
2324 local byte = cache % 256
2325 buffer_size = buffer_size + 1
2326 buffer[buffer_size] = string_char(byte)
2327 cache = (cache - byte) / 256
2328 end
2329 cache_bitlen = cache_bitlen - byte_from_cache*8
2330 bytelen = bytelen - byte_from_cache
2331 if (input_strlen - input_next_byte_pos - bytelen + 1) * 8
2332 + cache_bitlen < 0 then
2333 return -1 -- out of input
2334 end
2335 for i=input_next_byte_pos, input_next_byte_pos+bytelen-1 do
2336 buffer_size = buffer_size + 1
2337 buffer[buffer_size] = string_sub(input, i, i)
2338 end
2339
2340 input_next_byte_pos = input_next_byte_pos + bytelen
2341 return buffer_size
2342 end
2343
2344 -- Decode huffman code
2345 -- To improve speed, this function does not check
2346 -- if the input has been exhausted.
2347 -- Use ReaderBitlenLeft() < 0 to check it.
2348 -- Credits for Mark Adler. This code is from puff:Decode()
2349 -- @see puff:Decode(...)
2350 -- @param huffman_bitlen_count
2351 -- @param huffman_symbol
2352 -- @param min_bitlen The minimum huffman bit length of all symbols
2353 -- @return The decoded deflate code.
2354 -- Negative value is returned if decoding fails.
2355 local function Decode(huffman_bitlen_counts, huffman_symbols, min_bitlen)
2356 local code = 0
2357 local first = 0
2358 local index = 0
2359 local count
2360 if min_bitlen > 0 then
2361 if cache_bitlen < 15 and input then
2362 local lshift_mask = _pow2[cache_bitlen]
2363 local byte1, byte2, byte3, byte4 =
2364 string_byte(input, input_next_byte_pos
2365 , input_next_byte_pos+3)
2366 -- This requires lua number to be at least double ()
2367 cache = cache + ((byte1 or 0)+(byte2 or 0)*256
2368 +(byte3 or 0)*65536+(byte4 or 0)*16777216)*lshift_mask
2369 input_next_byte_pos = input_next_byte_pos + 4
2370 cache_bitlen = cache_bitlen + 32
2371 end
2372
2373 local rshift_mask = _pow2[min_bitlen]
2374 cache_bitlen = cache_bitlen - min_bitlen
2375 code = cache % rshift_mask
2376 cache = (cache - code) / rshift_mask
2377 -- Reverse the bits
2378 code = _reverse_bits_tbl[min_bitlen][code]
2379
2380 count = huffman_bitlen_counts[min_bitlen]
2381 if code < count then
2382 return huffman_symbols[code]
2383 end
2384 index = count
2385 first = count * 2
2386 code = code * 2
2387 end
2388
2389 for bitlen = min_bitlen+1, 15 do
2390 local bit
2391 bit = cache % 2
2392 cache = (cache - bit) / 2
2393 cache_bitlen = cache_bitlen - 1
2394
2395 code = (bit==1) and (code + 1 - code % 2) or code
2396 count = huffman_bitlen_counts[bitlen] or 0
2397 local diff = code - first
2398 if diff < count then
2399 return huffman_symbols[index + diff]
2400 end
2401 index = index + count
2402 first = first + count
2403 first = first * 2
2404 code = code * 2
2405 end
2406 -- invalid literal/length or distance code
2407 -- in fixed or dynamic block (run out of code)
2408 return -10
2409 end
2410
2411 local function ReaderBitlenLeft()
2412 return (input_strlen - input_next_byte_pos + 1) * 8 + cache_bitlen
2413 end
2414
2415 local function SkipToByteBoundary()
2416 local skipped_bitlen = cache_bitlen%8
2417 local rshift_mask = _pow2[skipped_bitlen]
2418 cache_bitlen = cache_bitlen - skipped_bitlen
2419 cache = (cache - cache % rshift_mask) / rshift_mask
2420 end
2421
2422 return ReadBits, ReadBytes, Decode, ReaderBitlenLeft, SkipToByteBoundary
2423end
2424
2425-- Create a deflate state, so I can pass in less arguments to functions.
2426-- @param str the whole string to be decompressed.
2427-- @param dictionary The preset dictionary. nil if not provided.
2428-- This dictionary should be produced by LibDeflate:CreateDictionary(str)
2429-- @return The decomrpess state.
2430local function CreateDecompressState(str, dictionary)
2431 local ReadBits, ReadBytes, Decode, ReaderBitlenLeft
2432 , SkipToByteBoundary = CreateReader(str)
2433 local state =
2434 {
2435 ReadBits = ReadBits,
2436 ReadBytes = ReadBytes,
2437 Decode = Decode,
2438 ReaderBitlenLeft = ReaderBitlenLeft,
2439 SkipToByteBoundary = SkipToByteBoundary,
2440 buffer_size = 0,
2441 buffer = {},
2442 result_buffer = {},
2443 dictionary = dictionary,
2444 }
2445 return state
2446end
2447
2448-- Get the stuffs needed to decode huffman codes
2449-- @see puff.c:construct(...)
2450-- @param huffman_bitlen The huffman bit length of the huffman codes.
2451-- @param max_symbol The maximum symbol
2452-- @param max_bitlen The min huffman bit length of all codes
2453-- @return zero or positive for success, negative for failure.
2454-- @return The count of each huffman bit length.
2455-- @return A table to convert huffman codes to deflate codes.
2456-- @return The minimum huffman bit length.
2457local function GetHuffmanForDecode(huffman_bitlens, max_symbol, max_bitlen)
2458 local huffman_bitlen_counts = {}
2459 local min_bitlen = max_bitlen
2460 for symbol = 0, max_symbol do
2461 local bitlen = huffman_bitlens[symbol] or 0
2462 min_bitlen = (bitlen > 0 and bitlen < min_bitlen)
2463 and bitlen or min_bitlen
2464 huffman_bitlen_counts[bitlen] = (huffman_bitlen_counts[bitlen] or 0)+1
2465 end
2466
2467 if huffman_bitlen_counts[0] == max_symbol+1 then -- No Codes
2468 return 0, huffman_bitlen_counts, {}, 0 -- Complete, but decode will fail
2469 end
2470
2471 local left = 1
2472 for len = 1, max_bitlen do
2473 left = left * 2
2474 left = left - (huffman_bitlen_counts[len] or 0)
2475 if left < 0 then
2476 return left -- Over-subscribed, return negative
2477 end
2478 end
2479
2480 -- Generate offsets info symbol table for each length for sorting
2481 local offsets = {}
2482 offsets[1] = 0
2483 for len = 1, max_bitlen-1 do
2484 offsets[len + 1] = offsets[len] + (huffman_bitlen_counts[len] or 0)
2485 end
2486
2487 local huffman_symbols = {}
2488 for symbol = 0, max_symbol do
2489 local bitlen = huffman_bitlens[symbol] or 0
2490 if bitlen ~= 0 then
2491 local offset = offsets[bitlen]
2492 huffman_symbols[offset] = symbol
2493 offsets[bitlen] = offsets[bitlen] + 1
2494 end
2495 end
2496
2497 -- Return zero for complete set, positive for incomplete set.
2498 return left, huffman_bitlen_counts, huffman_symbols, min_bitlen
2499end
2500
2501-- Decode a fixed or dynamic huffman blocks, excluding last block identifier
2502-- and block type identifer.
2503-- @see puff.c:codes()
2504-- @param state decompression state that will be modified by this function.
2505-- @see CreateDecompressState
2506-- @param ... Read the source code
2507-- @return 0 on success, other value on failure.
2508local function DecodeUntilEndOfBlock(state, lcodes_huffman_bitlens
2509 , lcodes_huffman_symbols, lcodes_huffman_min_bitlen
2510 , dcodes_huffman_bitlens, dcodes_huffman_symbols
2511 , dcodes_huffman_min_bitlen)
2512 local buffer, buffer_size, ReadBits, Decode, ReaderBitlenLeft
2513 , result_buffer =
2514 state.buffer, state.buffer_size, state.ReadBits, state.Decode
2515 , state.ReaderBitlenLeft, state.result_buffer
2516 local dictionary = state.dictionary
2517 local dict_string_table
2518 local dict_strlen
2519
2520 local buffer_end = 1
2521 if dictionary and not buffer[0] then
2522 -- If there is a dictionary, copy the last 258 bytes into
2523 -- the string_table to make the copy in the main loop quicker.
2524 -- This is done only once per decompression.
2525 dict_string_table = dictionary.string_table
2526 dict_strlen = dictionary.strlen
2527 buffer_end = -dict_strlen + 1
2528 for i=0, (-dict_strlen+1)<-257 and -257 or (-dict_strlen+1), -1 do
2529 buffer[i] = _byte_to_char[dict_string_table[dict_strlen+i]]
2530 end
2531 end
2532
2533 repeat
2534 local symbol = Decode(lcodes_huffman_bitlens
2535 , lcodes_huffman_symbols, lcodes_huffman_min_bitlen)
2536 if symbol < 0 or symbol > 285 then
2537 -- invalid literal/length or distance code in fixed or dynamic block
2538 return -10
2539 elseif symbol < 256 then -- Literal
2540 buffer_size = buffer_size + 1
2541 buffer[buffer_size] = _byte_to_char[symbol]
2542 elseif symbol > 256 then -- Length code
2543 symbol = symbol - 256
2544 local bitlen = _literal_deflate_code_to_base_len[symbol]
2545 bitlen = (symbol >= 8)
2546 and (bitlen
2547 + ReadBits(_literal_deflate_code_to_extra_bitlen[symbol]))
2548 or bitlen
2549 symbol = Decode(dcodes_huffman_bitlens, dcodes_huffman_symbols
2550 , dcodes_huffman_min_bitlen)
2551 if symbol < 0 or symbol > 29 then
2552 -- invalid literal/length or distance code in fixed or dynamic block
2553 return -10
2554 end
2555 local dist = _dist_deflate_code_to_base_dist[symbol]
2556 dist = (dist > 4) and (dist
2557 + ReadBits(_dist_deflate_code_to_extra_bitlen[symbol])) or dist
2558
2559 local char_buffer_index = buffer_size-dist+1
2560 if char_buffer_index < buffer_end then
2561 -- distance is too far back in fixed or dynamic block
2562 return -11
2563 end
2564 if char_buffer_index >= -257 then
2565 for _=1, bitlen do
2566 buffer_size = buffer_size + 1
2567 buffer[buffer_size] = buffer[char_buffer_index]
2568 char_buffer_index = char_buffer_index + 1
2569 end
2570 else
2571 char_buffer_index = dict_strlen + char_buffer_index
2572 for _=1, bitlen do
2573 buffer_size = buffer_size + 1
2574 buffer[buffer_size] =
2575 _byte_to_char[dict_string_table[char_buffer_index]]
2576 char_buffer_index = char_buffer_index + 1
2577 end
2578 end
2579 end
2580
2581 if ReaderBitlenLeft() < 0 then
2582 return 2 -- available inflate data did not terminate
2583 end
2584
2585 if buffer_size >= 65536 then
2586 result_buffer[#result_buffer+1] =
2587 table_concat(buffer, "", 1, 32768)
2588 for i=32769, buffer_size do
2589 buffer[i-32768] = buffer[i]
2590 end
2591 buffer_size = buffer_size - 32768
2592 buffer[buffer_size+1] = nil
2593 -- NOTE: buffer[32769..end] and buffer[-257..0] are not cleared.
2594 -- This is why "buffer_size" variable is needed.
2595 end
2596 until symbol == 256
2597
2598 state.buffer_size = buffer_size
2599
2600 return 0
2601end
2602
2603-- Decompress a store block
2604-- @param state decompression state that will be modified by this function.
2605-- @return 0 if succeeds, other value if fails.
2606local function DecompressStoreBlock(state)
2607 local buffer, buffer_size, ReadBits, ReadBytes, ReaderBitlenLeft
2608 , SkipToByteBoundary, result_buffer =
2609 state.buffer, state.buffer_size, state.ReadBits, state.ReadBytes
2610 , state.ReaderBitlenLeft, state.SkipToByteBoundary, state.result_buffer
2611
2612 SkipToByteBoundary()
2613 local bytelen = ReadBits(16)
2614 if ReaderBitlenLeft() < 0 then
2615 return 2 -- available inflate data did not terminate
2616 end
2617 local bytelenComp = ReadBits(16)
2618 if ReaderBitlenLeft() < 0 then
2619 return 2 -- available inflate data did not terminate
2620 end
2621
2622 if bytelen % 256 + bytelenComp % 256 ~= 255 then
2623 return -2 -- Not one's complement
2624 end
2625 if (bytelen-bytelen % 256)/256
2626 + (bytelenComp-bytelenComp % 256)/256 ~= 255 then
2627 return -2 -- Not one's complement
2628 end
2629
2630 -- Note that ReadBytes will skip to the next byte boundary first.
2631 buffer_size = ReadBytes(bytelen, buffer, buffer_size)
2632 if buffer_size < 0 then
2633 return 2 -- available inflate data did not terminate
2634 end
2635
2636 -- memory clean up when there are enough bytes in the buffer.
2637 if buffer_size >= 65536 then
2638 result_buffer[#result_buffer+1] = table_concat(buffer, "", 1, 32768)
2639 for i=32769, buffer_size do
2640 buffer[i-32768] = buffer[i]
2641 end
2642 buffer_size = buffer_size - 32768
2643 buffer[buffer_size+1] = nil
2644 end
2645 state.buffer_size = buffer_size
2646 return 0
2647end
2648
2649-- Decompress a fixed block
2650-- @param state decompression state that will be modified by this function.
2651-- @return 0 if succeeds other value if fails.
2652local function DecompressFixBlock(state)
2653 return DecodeUntilEndOfBlock(state
2654 , _fix_block_literal_huffman_bitlen_count
2655 , _fix_block_literal_huffman_to_deflate_code, 7
2656 , _fix_block_dist_huffman_bitlen_count
2657 , _fix_block_dist_huffman_to_deflate_code, 5)
2658end
2659
2660-- Decompress a dynamic block
2661-- @param state decompression state that will be modified by this function.
2662-- @return 0 if success, other value if fails.
2663local function DecompressDynamicBlock(state)
2664 local ReadBits, Decode = state.ReadBits, state.Decode
2665 local nlen = ReadBits(5) + 257
2666 local ndist = ReadBits(5) + 1
2667 local ncode = ReadBits(4) + 4
2668 if nlen > 286 or ndist > 30 then
2669 -- dynamic block code description: too many length or distance codes
2670 return -3
2671 end
2672
2673 local rle_codes_huffman_bitlens = {}
2674
2675 for i = 1, ncode do
2676 rle_codes_huffman_bitlens[_rle_codes_huffman_bitlen_order[i]] =
2677 ReadBits(3)
2678 end
2679
2680 local rle_codes_err, rle_codes_huffman_bitlen_counts,
2681 rle_codes_huffman_symbols, rle_codes_huffman_min_bitlen =
2682 GetHuffmanForDecode(rle_codes_huffman_bitlens, 18, 7)
2683 if rle_codes_err ~= 0 then -- Require complete code set here
2684 -- dynamic block code description: code lengths codes incomplete
2685 return -4
2686 end
2687
2688 local lcodes_huffman_bitlens = {}
2689 local dcodes_huffman_bitlens = {}
2690 -- Read length/literal and distance code length tables
2691 local index = 0
2692 while index < nlen + ndist do
2693 local symbol -- Decoded value
2694 local bitlen -- Last length to repeat
2695
2696 symbol = Decode(rle_codes_huffman_bitlen_counts
2697 , rle_codes_huffman_symbols, rle_codes_huffman_min_bitlen)
2698
2699 if symbol < 0 then
2700 return symbol -- Invalid symbol
2701 elseif symbol < 16 then
2702 if index < nlen then
2703 lcodes_huffman_bitlens[index] = symbol
2704 else
2705 dcodes_huffman_bitlens[index-nlen] = symbol
2706 end
2707 index = index + 1
2708 else
2709 bitlen = 0
2710 if symbol == 16 then
2711 if index == 0 then
2712 -- dynamic block code description: repeat lengths
2713 -- with no first length
2714 return -5
2715 end
2716 if index-1 < nlen then
2717 bitlen = lcodes_huffman_bitlens[index-1]
2718 else
2719 bitlen = dcodes_huffman_bitlens[index-nlen-1]
2720 end
2721 symbol = 3 + ReadBits(2)
2722 elseif symbol == 17 then -- Repeat zero 3..10 times
2723 symbol = 3 + ReadBits(3)
2724 else -- == 18, repeat zero 11.138 times
2725 symbol = 11 + ReadBits(7)
2726 end
2727 if index + symbol > nlen + ndist then
2728 -- dynamic block code description:
2729 -- repeat more than specified lengths
2730 return -6
2731 end
2732 while symbol > 0 do -- Repeat last or zero symbol times
2733 symbol = symbol - 1
2734 if index < nlen then
2735 lcodes_huffman_bitlens[index] = bitlen
2736 else
2737 dcodes_huffman_bitlens[index-nlen] = bitlen
2738 end
2739 index = index + 1
2740 end
2741 end
2742 end
2743
2744 if (lcodes_huffman_bitlens[256] or 0) == 0 then
2745 -- dynamic block code description: missing end-of-block code
2746 return -9
2747 end
2748
2749 local lcodes_err, lcodes_huffman_bitlen_counts
2750 , lcodes_huffman_symbols, lcodes_huffman_min_bitlen =
2751 GetHuffmanForDecode(lcodes_huffman_bitlens, nlen-1, 15)
2752 --dynamic block code description: invalid literal/length code lengths,
2753 -- Incomplete code ok only for single length 1 code
2754 if (lcodes_err ~=0 and (lcodes_err < 0
2755 or nlen ~= (lcodes_huffman_bitlen_counts[0] or 0)
2756 +(lcodes_huffman_bitlen_counts[1] or 0))) then
2757 return -7
2758 end
2759
2760 local dcodes_err, dcodes_huffman_bitlen_counts
2761 , dcodes_huffman_symbols, dcodes_huffman_min_bitlen =
2762 GetHuffmanForDecode(dcodes_huffman_bitlens, ndist-1, 15)
2763 -- dynamic block code description: invalid distance code lengths,
2764 -- Incomplete code ok only for single length 1 code
2765 if (dcodes_err ~=0 and (dcodes_err < 0
2766 or ndist ~= (dcodes_huffman_bitlen_counts[0] or 0)
2767 + (dcodes_huffman_bitlen_counts[1] or 0))) then
2768 return -8
2769 end
2770
2771 -- Build buffman table for literal/length codes
2772 return DecodeUntilEndOfBlock(state, lcodes_huffman_bitlen_counts
2773 , lcodes_huffman_symbols, lcodes_huffman_min_bitlen
2774 , dcodes_huffman_bitlen_counts, dcodes_huffman_symbols
2775 , dcodes_huffman_min_bitlen)
2776end
2777
2778-- Decompress a deflate stream
2779-- @param state: a decompression state
2780-- @return the decompressed string if succeeds. nil if fails.
2781local function Inflate(state)
2782 local ReadBits = state.ReadBits
2783
2784 local is_last_block
2785 while not is_last_block do
2786 is_last_block = (ReadBits(1) == 1)
2787 local block_type = ReadBits(2)
2788 local status
2789 if block_type == 0 then
2790 status = DecompressStoreBlock(state)
2791 elseif block_type == 1 then
2792 status = DecompressFixBlock(state)
2793 elseif block_type == 2 then
2794 status = DecompressDynamicBlock(state)
2795 else
2796 return nil, -1 -- invalid block type (type == 3)
2797 end
2798 if status ~= 0 then
2799 return nil, status
2800 end
2801 end
2802
2803 state.result_buffer[#state.result_buffer+1] =
2804 table_concat(state.buffer, "", 1, state.buffer_size)
2805 local result = table_concat(state.result_buffer)
2806 return result
2807end
2808
2809-- @see LibDeflate:DecompressDeflate(str)
2810-- @see LibDeflate:DecompressDeflateWithDict(str, dictionary)
2811local function DecompressDeflateInternal(str, dictionary)
2812 local state = CreateDecompressState(str, dictionary)
2813 local result, status = Inflate(state)
2814 if not result then
2815 return nil, status
2816 end
2817
2818 local bitlen_left = state.ReaderBitlenLeft()
2819 local bytelen_left = (bitlen_left - bitlen_left % 8) / 8
2820 return result, bytelen_left
2821end
2822
2823-- @see LibDeflate:DecompressZlib(str)
2824-- @see LibDeflate:DecompressZlibWithDict(str)
2825local function DecompressZlibInternal(str, dictionary)
2826 local state = CreateDecompressState(str, dictionary)
2827 local ReadBits = state.ReadBits
2828
2829 local CMF = ReadBits(8)
2830 if state.ReaderBitlenLeft() < 0 then
2831 return nil, 2 -- available inflate data did not terminate
2832 end
2833 local CM = CMF % 16
2834 local CINFO = (CMF - CM) / 16
2835 if CM ~= 8 then
2836 return nil, -12 -- invalid compression method
2837 end
2838 if CINFO > 7 then
2839 return nil, -13 -- invalid window size
2840 end
2841
2842 local FLG = ReadBits(8)
2843 if state.ReaderBitlenLeft() < 0 then
2844 return nil, 2 -- available inflate data did not terminate
2845 end
2846 if (CMF*256+FLG)%31 ~= 0 then
2847 return nil, -14 -- invalid header checksum
2848 end
2849
2850 local FDIST = ((FLG-FLG%32)/32 % 2)
2851 local FLEVEL = ((FLG-FLG%64)/64 % 4) -- luacheck: ignore FLEVEL
2852
2853 if FDIST == 1 then
2854 if not dictionary then
2855 return nil, -16 -- need dictonary, but dictionary is not provided.
2856 end
2857 local byte3 = ReadBits(8)
2858 local byte2 = ReadBits(8)
2859 local byte1 = ReadBits(8)
2860 local byte0 = ReadBits(8)
2861 local actual_adler32 = byte3*16777216+byte2*65536+byte1*256+byte0
2862 if state.ReaderBitlenLeft() < 0 then
2863 return nil, 2 -- available inflate data did not terminate
2864 end
2865 if not IsEqualAdler32(actual_adler32, dictionary.adler32) then
2866 return nil, -17 -- dictionary adler32 does not match
2867 end
2868 end
2869 local result, status = Inflate(state)
2870 if not result then
2871 return nil, status
2872 end
2873 state.SkipToByteBoundary()
2874
2875 local adler_byte0 = ReadBits(8)
2876 local adler_byte1 = ReadBits(8)
2877 local adler_byte2 = ReadBits(8)
2878 local adler_byte3 = ReadBits(8)
2879 if state.ReaderBitlenLeft() < 0 then
2880 return nil, 2 -- available inflate data did not terminate
2881 end
2882
2883 local adler32_expected = adler_byte0*16777216
2884 + adler_byte1*65536 + adler_byte2*256 + adler_byte3
2885 local adler32_actual = LibDeflate:Adler32(result)
2886 if not IsEqualAdler32(adler32_expected, adler32_actual) then
2887 return nil, -15 -- Adler32 checksum does not match
2888 end
2889
2890 local bitlen_left = state.ReaderBitlenLeft()
2891 local bytelen_left = (bitlen_left - bitlen_left % 8) / 8
2892 return result, bytelen_left
2893end
2894
2895--- Decompress a raw deflate compressed data.
2896-- @param str [string] The data to be decompressed.
2897-- @return [string/nil] If the decompression succeeds, return the decompressed
2898-- data. If the decompression fails, return nil. You should check if this return
2899-- value is non-nil to know if the decompression succeeds.
2900-- @return [integer] If the decompression succeeds, return the number of
2901-- unprocessed bytes in the input compressed data. This return value is a
2902-- positive integer if the input data is a valid compressed data appended by an
2903-- arbitary non-empty string. This return value is 0 if the input data does not
2904-- contain any extra bytes.<br>
2905-- If the decompression fails (The first return value of this function is nil),
2906-- this return value is undefined.
2907-- @see LibDeflate:CompressDeflate
2908function LibDeflate:DecompressDeflate(str)
2909 local arg_valid, arg_err = IsValidArguments(str)
2910 if not arg_valid then
2911 error(("Usage: LibDeflate:DecompressDeflate(str): "
2912 ..arg_err), 2)
2913 end
2914 return DecompressDeflateInternal(str)
2915end
2916
2917--- Decompress a raw deflate compressed data with a preset dictionary.
2918-- @param str [string] The data to be decompressed.
2919-- @param dictionary [table] The preset dictionary used by
2920-- LibDeflate:CompressDeflateWithDict when the compressed data is produced.
2921-- Decompression and compression must use the same dictionary.
2922-- Otherwise wrong decompressed data could be produced without generating any
2923-- error.
2924-- @return [string/nil] If the decompression succeeds, return the decompressed
2925-- data. If the decompression fails, return nil. You should check if this return
2926-- value is non-nil to know if the decompression succeeds.
2927-- @return [integer] If the decompression succeeds, return the number of
2928-- unprocessed bytes in the input compressed data. This return value is a
2929-- positive integer if the input data is a valid compressed data appended by an
2930-- arbitary non-empty string. This return value is 0 if the input data does not
2931-- contain any extra bytes.<br>
2932-- If the decompression fails (The first return value of this function is nil),
2933-- this return value is undefined.
2934-- @see LibDeflate:CompressDeflateWithDict
2935function LibDeflate:DecompressDeflateWithDict(str, dictionary)
2936 local arg_valid, arg_err = IsValidArguments(str, true, dictionary)
2937 if not arg_valid then
2938 error(("Usage: LibDeflate:DecompressDeflateWithDict(str, dictionary): "
2939 ..arg_err), 2)
2940 end
2941 return DecompressDeflateInternal(str, dictionary)
2942end
2943
2944--- Decompress a zlib compressed data.
2945-- @param str [string] The data to be decompressed
2946-- @return [string/nil] If the decompression succeeds, return the decompressed
2947-- data. If the decompression fails, return nil. You should check if this return
2948-- value is non-nil to know if the decompression succeeds.
2949-- @return [integer] If the decompression succeeds, return the number of
2950-- unprocessed bytes in the input compressed data. This return value is a
2951-- positive integer if the input data is a valid compressed data appended by an
2952-- arbitary non-empty string. This return value is 0 if the input data does not
2953-- contain any extra bytes.<br>
2954-- If the decompression fails (The first return value of this function is nil),
2955-- this return value is undefined.
2956-- @see LibDeflate:CompressZlib
2957function LibDeflate:DecompressZlib(str)
2958 local arg_valid, arg_err = IsValidArguments(str)
2959 if not arg_valid then
2960 error(("Usage: LibDeflate:DecompressZlib(str): "
2961 ..arg_err), 2)
2962 end
2963 return DecompressZlibInternal(str)
2964end
2965
2966--- Decompress a zlib compressed data with a preset dictionary.
2967-- @param str [string] The data to be decompressed
2968-- @param dictionary [table] The preset dictionary used by
2969-- LibDeflate:CompressDeflateWithDict when the compressed data is produced.
2970-- Decompression and compression must use the same dictionary.
2971-- Otherwise wrong decompressed data could be produced without generating any
2972-- error.
2973-- @return [string/nil] If the decompression succeeds, return the decompressed
2974-- data. If the decompression fails, return nil. You should check if this return
2975-- value is non-nil to know if the decompression succeeds.
2976-- @return [integer] If the decompression succeeds, return the number of
2977-- unprocessed bytes in the input compressed data. This return value is a
2978-- positive integer if the input data is a valid compressed data appended by an
2979-- arbitary non-empty string. This return value is 0 if the input data does not
2980-- contain any extra bytes.<br>
2981-- If the decompression fails (The first return value of this function is nil),
2982-- this return value is undefined.
2983-- @see LibDeflate:CompressZlibWithDict
2984function LibDeflate:DecompressZlibWithDict(str, dictionary)
2985 local arg_valid, arg_err = IsValidArguments(str, true, dictionary)
2986 if not arg_valid then
2987 error(("Usage: LibDeflate:DecompressZlibWithDict(str, dictionary): "
2988 ..arg_err), 2)
2989 end
2990 return DecompressZlibInternal(str, dictionary)
2991end
2992
2993-- Calculate the huffman code of fixed block
2994do
2995 _fix_block_literal_huffman_bitlen = {}
2996 for sym=0, 143 do
2997 _fix_block_literal_huffman_bitlen[sym] = 8
2998 end
2999 for sym=144, 255 do
3000 _fix_block_literal_huffman_bitlen[sym] = 9
3001 end
3002 for sym=256, 279 do
3003 _fix_block_literal_huffman_bitlen[sym] = 7
3004 end
3005 for sym=280, 287 do
3006 _fix_block_literal_huffman_bitlen[sym] = 8
3007 end
3008
3009 _fix_block_dist_huffman_bitlen = {}
3010 for dist=0, 31 do
3011 _fix_block_dist_huffman_bitlen[dist] = 5
3012 end
3013 local status
3014 status, _fix_block_literal_huffman_bitlen_count
3015 , _fix_block_literal_huffman_to_deflate_code =
3016 GetHuffmanForDecode(_fix_block_literal_huffman_bitlen, 287, 9)
3017 assert(status == 0)
3018 status, _fix_block_dist_huffman_bitlen_count,
3019 _fix_block_dist_huffman_to_deflate_code =
3020 GetHuffmanForDecode(_fix_block_dist_huffman_bitlen, 31, 5)
3021 assert(status == 0)
3022
3023 _fix_block_literal_huffman_code =
3024 GetHuffmanCodeFromBitlen(_fix_block_literal_huffman_bitlen_count
3025 , _fix_block_literal_huffman_bitlen, 287, 9)
3026 _fix_block_dist_huffman_code =
3027 GetHuffmanCodeFromBitlen(_fix_block_dist_huffman_bitlen_count
3028 , _fix_block_dist_huffman_bitlen, 31, 5)
3029end
3030
3031-- Prefix encoding algorithm
3032-- Credits to LibCompress.
3033-- The code has been rewritten by the author of LibDeflate.
3034------------------------------------------------------------------------------
3035
3036-- to be able to match any requested byte value, the search
3037-- string must be preprocessed characters to escape with %:
3038-- ( ) . % + - * ? [ ] ^ $
3039-- "illegal" byte values:
3040-- 0 is replaces %z
3041local _gsub_escape_table = {
3042 ["\000"] = "%z", ["("] = "%(", [")"] = "%)", ["."] = "%.",
3043 ["%"] = "%%", ["+"] = "%+", ["-"] = "%-", ["*"] = "%*",
3044 ["?"] = "%?", ["["] = "%[", ["]"] = "%]", ["^"] = "%^",
3045 ["$"] = "%$",
3046}
3047
3048local function escape_for_gsub(str)
3049 return str:gsub("([%z%(%)%.%%%+%-%*%?%[%]%^%$])", _gsub_escape_table)
3050end
3051
3052--- Create a custom codec with encoder and decoder. <br>
3053-- This codec is used to convert an input string to make it not contain
3054-- some specific bytes.
3055-- This created codec and the parameters of this function do NOT take
3056-- localization into account. One byte (0-255) in the string is exactly one
3057-- character (0-255).
3058-- Credits to LibCompress.
3059-- The code has been rewritten by the author of LibDeflate. <br>
3060-- @param reserved_chars [string] The created encoder will ensure encoded
3061-- data does not contain any single character in reserved_chars. This parameter
3062-- should be non-empty.
3063-- @param escape_chars [string] The escape character(s) used in the created
3064-- codec. The codec converts any character included in reserved\_chars /
3065-- escape\_chars / map\_chars to (one escape char + one character not in
3066-- reserved\_chars / escape\_chars / map\_chars).
3067-- You usually only need to provide a length-1 string for this parameter.
3068-- Length-2 string is only needed when
3069-- reserved\_chars + escape\_chars + map\_chars is longer than 127.
3070-- This parameter should be non-empty.
3071-- @param map_chars [string] The created encoder will map every
3072-- reserved\_chars:sub(i, i) (1 <= i <= #map\_chars) to map\_chars:sub(i, i).
3073-- This parameter CAN be empty string.
3074-- @return [table/nil] If the codec cannot be created, return nil.<br>
3075-- If the codec can be created according to the given
3076-- parameters, return the codec, which is a encode/decode table.
3077-- The table contains two functions: <br>
3078-- t:Encode(str) returns the encoded string. <br>
3079-- t:Decode(str) returns the decoded string if succeeds. nil if fails.
3080-- @return [nil/string] If the codec is successfully created, return nil.
3081-- If not, return a string that describes the reason why the codec cannot be
3082-- created.
3083-- @usage
3084-- -- Create an encoder/decoder that maps all "\000" to "\003",
3085-- -- and escape "\001" (and "\002" and "\003") properly
3086-- local codec = LibDeflate:CreateCodec("\000\001", "\002", "\003")
3087--
3088-- local encoded = codec:Encode(SOME_STRING)
3089-- -- "encoded" does not contain "\000" or "\001"
3090-- local decoded = codec:Decode(encoded)
3091-- -- assert(decoded == SOME_STRING)
3092function LibDeflate:CreateCodec(reserved_chars, escape_chars
3093 , map_chars)
3094 if type(reserved_chars) ~= "string"
3095 or type(escape_chars) ~= "string"
3096 or type(map_chars) ~= "string" then
3097 error(
3098 "Usage: LibDeflate:CreateCodec(reserved_chars,"
3099 .." escape_chars, map_chars):"
3100 .." All arguments must be string.", 2)
3101 end
3102
3103 if escape_chars == "" then
3104 return nil, "No escape characters supplied."
3105 end
3106 if #reserved_chars < #map_chars then
3107 return nil, "The number of reserved characters must be"
3108 .." at least as many as the number of mapped chars."
3109 end
3110 if reserved_chars == "" then
3111 return nil, "No characters to encode."
3112 end
3113
3114 local encode_bytes = reserved_chars..escape_chars..map_chars
3115 -- build list of bytes not available as a suffix to a prefix byte
3116 local taken = {}
3117 for i = 1, #encode_bytes do
3118 local byte = string_byte(encode_bytes, i, i)
3119 if taken[byte] then
3120 return nil, "There must be no duplicate characters in the"
3121 .." concatenation of reserved_chars, escape_chars and"
3122 .." map_chars."
3123 end
3124 taken[byte] = true
3125 end
3126
3127 local decode_patterns = {}
3128 local decode_repls = {}
3129
3130 -- the encoding can be a single gsub
3131 -- , but the decoding can require multiple gsubs
3132 local encode_search = {}
3133 local encode_translate = {}
3134
3135 -- map single byte to single byte
3136 if #map_chars > 0 then
3137 local decode_search = {}
3138 local decode_translate = {}
3139 for i = 1, #map_chars do
3140 local from = string_sub(reserved_chars, i, i)
3141 local to = string_sub(map_chars, i, i)
3142 encode_translate[from] = to
3143 encode_search[#encode_search+1] = from
3144 decode_translate[to] = from
3145 decode_search[#decode_search+1] = to
3146 end
3147 decode_patterns[#decode_patterns+1] =
3148 "([".. escape_for_gsub(table_concat(decode_search)).."])"
3149 decode_repls[#decode_repls+1] = decode_translate
3150 end
3151
3152 local escape_char_index = 1
3153 local escape_char = string_sub(escape_chars
3154 , escape_char_index, escape_char_index)
3155 -- map single byte to double-byte
3156 local r = 0 -- suffix char value to the escapeChar
3157
3158 local decode_search = {}
3159 local decode_translate = {}
3160 for i = 1, #encode_bytes do
3161 local c = string_sub(encode_bytes, i, i)
3162 if not encode_translate[c] then
3163 while r >= 256 or taken[r] do
3164 r = r + 1
3165 if r > 255 then -- switch to next escapeChar
3166 decode_patterns[#decode_patterns+1] =
3167 escape_for_gsub(escape_char)
3168 .."(["
3169 .. escape_for_gsub(table_concat(decode_search)).."])"
3170 decode_repls[#decode_repls+1] = decode_translate
3171
3172 escape_char_index = escape_char_index + 1
3173 escape_char = string_sub(escape_chars, escape_char_index
3174 , escape_char_index)
3175 r = 0
3176 decode_search = {}
3177 decode_translate = {}
3178
3179 if not escape_char or escape_char == "" then
3180 -- actually I don't need to check
3181 -- "not ecape_char", but what if Lua changes
3182 -- the behavior of string.sub() in the future?
3183 -- we are out of escape chars and we need more!
3184 return nil, "Out of escape characters."
3185 end
3186 end
3187 end
3188
3189 local char_r = _byte_to_char[r]
3190 encode_translate[c] = escape_char..char_r
3191 encode_search[#encode_search+1] = c
3192 decode_translate[char_r] = c
3193 decode_search[#decode_search+1] = char_r
3194 r = r + 1
3195 end
3196 if i == #encode_bytes then
3197 decode_patterns[#decode_patterns+1] =
3198 escape_for_gsub(escape_char).."(["
3199 .. escape_for_gsub(table_concat(decode_search)).."])"
3200 decode_repls[#decode_repls+1] = decode_translate
3201 end
3202 end
3203
3204 local codec = {}
3205
3206 local encode_pattern = "(["
3207 .. escape_for_gsub(table_concat(encode_search)).."])"
3208 local encode_repl = encode_translate
3209
3210 function codec:Encode(str)
3211 if type(str) ~= "string" then
3212 error(("Usage: codec:Encode(str):"
3213 .." 'str' - string expected got '%s'."):format(type(str)), 2)
3214 end
3215 return string_gsub(str, encode_pattern, encode_repl)
3216 end
3217
3218 local decode_tblsize = #decode_patterns
3219 local decode_fail_pattern = "(["
3220 .. escape_for_gsub(reserved_chars).."])"
3221
3222 function codec:Decode(str)
3223 if type(str) ~= "string" then
3224 error(("Usage: codec:Decode(str):"
3225 .." 'str' - string expected got '%s'."):format(type(str)), 2)
3226 end
3227 if string_find(str, decode_fail_pattern) then
3228 return nil
3229 end
3230 for i = 1, decode_tblsize do
3231 str = string_gsub(str, decode_patterns[i], decode_repls[i])
3232 end
3233 return str
3234 end
3235
3236 return codec
3237end
3238
3239local _addon_channel_codec
3240
3241
3242-- Credits to WeakAuras2 and Galmok for the 6 bit encoding algorithm.
3243-- The code has been rewritten by the author of LibDeflate.
3244-- The result of encoding will be 25% larger than the
3245-- origin string, but every single byte of the encoding result will be
3246-- printable characters as the following.
3247local _byte_to_6bit_char = {
3248 [0]="a", "b", "c", "d", "e", "f", "g", "h",
3249 "i", "j", "k", "l", "m", "n", "o", "p",
3250 "q", "r", "s", "t", "u", "v", "w", "x",
3251 "y", "z", "A", "B", "C", "D", "E", "F",
3252 "G", "H", "I", "J", "K", "L", "M", "N",
3253 "O", "P", "Q", "R", "S", "T", "U", "V",
3254 "W", "X", "Y", "Z", "0", "1", "2", "3",
3255 "4", "5", "6", "7", "8", "9", "(", ")",
3256}
3257
3258local _6bit_to_byte = {
3259 [97]=0,[98]=1,[99]=2,[100]=3,[101]=4,[102]=5,[103]=6,[104]=7,
3260 [105]=8,[106]=9,[107]=10,[108]=11,[109]=12,[110]=13,[111]=14,[112]=15,
3261 [113]=16,[114]=17,[115]=18,[116]=19,[117]=20,[118]=21,[119]=22,[120]=23,
3262 [121]=24,[122]=25,[65]=26,[66]=27,[67]=28,[68]=29,[69]=30,[70]=31,
3263 [71]=32,[72]=33,[73]=34,[74]=35,[75]=36,[76]=37,[77]=38,[78]=39,
3264 [79]=40,[80]=41,[81]=42,[82]=43,[83]=44,[84]=45,[85]=46,[86]=47,
3265 [87]=48,[88]=49,[89]=50,[90]=51,[48]=52,[49]=53,[50]=54,[51]=55,
3266 [52]=56,[53]=57,[54]=58,[55]=59,[56]=60,[57]=61,[40]=62,[41]=63,
3267}
3268
3269--- Encode the string to make it printable. <br>
3270--
3271-- Credit to WeakAuras2, this function is equivalant to the implementation
3272-- it is using right now. <br>
3273-- The code has been rewritten by the author of LibDeflate. <br>
3274-- The encoded string will be 25% larger than the origin string. However, every
3275-- single byte of the encoded string will be one of 64 printable ASCII
3276-- characters, which are can be easier copied, pasted and displayed.
3277-- (26 lowercase letters, 26 uppercase letters, 10 numbers digits,
3278-- left parenthese, or right parenthese)
3279-- @param str [string] The string to be encoded.
3280-- @return [string] The encoded string.
3281function LibDeflate:EncodeForPrint(str)
3282 if type(str) ~= "string" then
3283 error(("Usage: LibDeflate:EncodeForPrint(str):"
3284 .." 'str' - string expected got '%s'."):format(type(str)), 2)
3285 end
3286 local strlen = #str
3287 local strlenMinus2 = strlen - 2
3288 local i = 1
3289 local buffer = {}
3290 local buffer_size = 0
3291 while i <= strlenMinus2 do
3292 local x1, x2, x3 = string_byte(str, i, i+2)
3293 i = i + 3
3294 local cache = x1+x2*256+x3*65536
3295 local b1 = cache % 64
3296 cache = (cache - b1) / 64
3297 local b2 = cache % 64
3298 cache = (cache - b2) / 64
3299 local b3 = cache % 64
3300 local b4 = (cache - b3) / 64
3301 buffer_size = buffer_size + 1
3302 buffer[buffer_size] =
3303 _byte_to_6bit_char[b1].._byte_to_6bit_char[b2]
3304 .._byte_to_6bit_char[b3].._byte_to_6bit_char[b4]
3305 end
3306
3307 local cache = 0
3308 local cache_bitlen = 0
3309 while i <= strlen do
3310 local x = string_byte(str, i, i)
3311 cache = cache + x * _pow2[cache_bitlen]
3312 cache_bitlen = cache_bitlen + 8
3313 i = i + 1
3314 end
3315 while cache_bitlen > 0 do
3316 local bit6 = cache % 64
3317 buffer_size = buffer_size + 1
3318 buffer[buffer_size] = _byte_to_6bit_char[bit6]
3319 cache = (cache - bit6) / 64
3320 cache_bitlen = cache_bitlen - 6
3321 end
3322
3323 return table_concat(buffer)
3324end
3325
3326--- Decode the printable string produced by LibDeflate:EncodeForPrint.
3327-- "str" will have its prefixed and trailing control characters or space
3328-- removed before it is decoded, so it is easier to use if "str" comes form
3329-- user copy and paste with some prefixed or trailing spaces.
3330-- Then decode fails if the string contains any characters cant be produced by
3331-- LibDeflate:EncodeForPrint. That means, decode fails if the string contains a
3332-- characters NOT one of 26 lowercase letters, 26 uppercase letters,
3333-- 10 numbers digits, left parenthese, or right parenthese.
3334-- @param str [string] The string to be decoded
3335-- @return [string/nil] The decoded string if succeeds. nil if fails.
3336function LibDeflate:DecodeForPrint(str)
3337 if type(str) ~= "string" then
3338 error(("Usage: LibDeflate:DecodeForPrint(str):"
3339 .." 'str' - string expected got '%s'."):format(type(str)), 2)
3340 end
3341 str = str:gsub("^[%c ]+", "")
3342 str = str:gsub("[%c ]+$", "")
3343
3344 local strlen = #str
3345 if strlen == 1 then
3346 return nil
3347 end
3348 local strlenMinus3 = strlen - 3
3349 local i = 1
3350 local buffer = {}
3351 local buffer_size = 0
3352 while i <= strlenMinus3 do
3353 local x1, x2, x3, x4 = string_byte(str, i, i+3)
3354 x1 = _6bit_to_byte[x1]
3355 x2 = _6bit_to_byte[x2]
3356 x3 = _6bit_to_byte[x3]
3357 x4 = _6bit_to_byte[x4]
3358 if not (x1 and x2 and x3 and x4) then
3359 return nil
3360 end
3361 i = i + 4
3362 local cache = x1+x2*64+x3*4096+x4*262144
3363 local b1 = cache % 256
3364 cache = (cache - b1) / 256
3365 local b2 = cache % 256
3366 local b3 = (cache - b2) / 256
3367 buffer_size = buffer_size + 1
3368 buffer[buffer_size] =
3369 _byte_to_char[b1].._byte_to_char[b2].._byte_to_char[b3]
3370 end
3371
3372 local cache = 0
3373 local cache_bitlen = 0
3374 while i <= strlen do
3375 local x = string_byte(str, i, i)
3376 x = _6bit_to_byte[x]
3377 if not x then
3378 return nil
3379 end
3380 cache = cache + x * _pow2[cache_bitlen]
3381 cache_bitlen = cache_bitlen + 6
3382 i = i + 1
3383 end
3384
3385 while cache_bitlen >= 8 do
3386 local byte = cache % 256
3387 buffer_size = buffer_size + 1
3388 buffer[buffer_size] = _byte_to_char[byte]
3389 cache = (cache - byte) / 256
3390 cache_bitlen = cache_bitlen - 8
3391 end
3392
3393 return table_concat(buffer)
3394end
3395
3396local function InternalClearCache()
3397 _addon_channel_codec = nil
3398end
3399
3400-- For test. Don't use the functions in this table for real application.
3401-- Stuffs in this table is subject to change.
3402LibDeflate.internals = {
3403 LoadStringToTable = LoadStringToTable,
3404 IsValidDictionary = IsValidDictionary,
3405 IsEqualAdler32 = IsEqualAdler32,
3406 _byte_to_6bit_char = _byte_to_6bit_char,
3407 _6bit_to_byte = _6bit_to_byte,
3408 InternalClearCache = InternalClearCache,
3409}
3410
3411return Compression