· 6 years ago · Apr 17, 2020, 11:50 AM
1--[[--
2LibDeflate 1.0.0-release <br>
3Pure Lua compressor and decompressor with high compression ratio using
4DEFLATE/zlib format.
5
6@file LibDeflate.lua
7@author Haoqian He (Github: SafeteeWoW; World of Warcraft: Safetyy-Illidan(US))
8@copyright LibDeflate <2018> Haoqian He
9@license GNU General Public License Version 3 or later
10
11This library is implemented according to the following specifications. <br>
12Report a bug if LibDeflate is not fully compliant with those specs. <br>
13Both compressors and decompressors have been implemented in the library.<br>
141. RFC1950: DEFLATE Compressed Data Format Specification version 1.3 <br>
15https://tools.ietf.org/html/rfc1951 <br>
162. RFC1951: ZLIB Compressed Data Format Specification version 3.3 <br>
17https://tools.ietf.org/html/rfc1950 <br>
18
19This library requires Lua 5.1/5.2/5.3 interpreter or LuaJIT v2.0+. <br>
20This library does not have any dependencies. <br>
21<br>
22This file "LibDeflate.lua" is the only source file of
23the library. <br>
24Submit suggestions or report bugs to
25https://github.com/safeteeWow/LibDeflate/issues
26]]
27
28--[[
29This program is free software: you can redistribute it and/or modify
30it under the terms of the GNU General Public License as published by
31the Free Software Foundation, either version 3 of the License, or
32any later version.
33
34This program is distributed in the hope that it will be useful,
35but WITHOUT ANY WARRANTY; without even the implied warranty of
36MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
37GNU General Public License for more details.
38
39You should have received a copy of the GNU General Public License
40along with this program. If not, see https://www.gnu.org/licenses/.
41
42Credits:
431. zlib, by Jean-loup Gailly (compression) and Mark Adler (decompression).
44 http://www.zlib.net/
45 Licensed under zlib License. http://www.zlib.net/zlib_license.html
46 For the compression algorithm.
472. puff, by Mark Adler. https://github.com/madler/zlib/tree/master/contrib/puff
48 Licensed under zlib License. http://www.zlib.net/zlib_license.html
49 For the decompression algorithm.
503. LibCompress, by jjsheets and Galmok of European Stormrage (Horde)
51 https://www.wowace.com/projects/libcompress
52 Licensed under GPLv2.
53 https://www.gnu.org/licenses/old-licenses/gpl-2.0.html
54 For the code to create customized codec.
554. WeakAuras2,
56 https://github.com/WeakAuras/WeakAuras2
57 Licensed under GPLv2.
58 For the 6bit encoding and decoding.
59]]
60
61--[[
62 Curseforge auto-packaging replacements:
63
64 Project Date: @project-date-iso@
65 Project Hash: @project-hash@
66 Project Version: @project-version@
67--]]
68
69local LibDeflate
70
71do
72 -- Semantic version. all lowercase.
73 -- Suffix can be alpha1, alpha2, beta1, beta2, rc1, rc2, etc.
74 -- NOTE: Two version numbers needs to modify.
75 -- 1. On the top of LibDeflate.lua
76 -- 2. HERE
77 local _VERSION = "1.0.0-release"
78
79 local _COPYRIGHT =
80 "LibDeflate ".._VERSION
81 .." Copyright (C) 2018 Haoqian He."
82 .." License GPLv3+: GNU GPL version 3 or later"
83
84 -- Register in the World of Warcraft library "LibStub" if detected.
85 if LibStub then
86 local MAJOR, MINOR = "LibDeflate", -1
87 -- When MAJOR is changed, I should name it as LibDeflate2
88 local lib, minor = LibStub:GetLibrary(MAJOR, true)
89 if lib and minor and minor >= MINOR then -- No need to update.
90 return lib
91 else -- Update or first time register
92 LibDeflate = LibStub:NewLibrary(MAJOR, _VERSION)
93 -- NOTE: It is important that new version has implemented
94 -- all exported APIs and tables in the old version,
95 -- so the old library is fully garbage collected,
96 -- and we 100% ensure the backward compatibility.
97 end
98 else -- "LibStub" is not detected.
99 LibDeflate = {}
100 end
101
102 LibDeflate._VERSION = _VERSION
103 LibDeflate._COPYRIGHT = _COPYRIGHT
104end
105
106-- localize Lua api for faster access.
107local assert = assert
108local error = error
109local pairs = pairs
110local string_byte = string.byte
111local string_char = string.char
112local string_find = string.find
113local string_gsub = string.gsub
114local string_sub = string.sub
115local table_concat = table.concat
116local table_sort = table.sort
117local tostring = tostring
118local type = type
119
120-- Converts i to 2^i, (0<=i<=32)
121-- This is used to implement bit left shift and bit right shift.
122-- "x >> y" in C: "(x-x%_pow2[y])/_pow2[y]" in Lua
123-- "x << y" in C: "x*_pow2[y]" in Lua
124local _pow2 = {}
125
126-- Converts any byte to a character, (0<=byte<=255)
127local _byte_to_char = {}
128
129-- _reverseBitsTbl[len][val] stores the bit reverse of
130-- the number with bit length "len" and value "val"
131-- For example, decimal number 6 with bits length 5 is binary 00110
132-- It's reverse is binary 01100,
133-- which is decimal 12 and 12 == _reverseBitsTbl[5][6]
134-- 1<=len<=9, 0<=val<=2^len-1
135-- The reason for 1<=len<=9 is that the max of min bitlen of huffman code
136-- of a huffman alphabet is 9?
137local _reverse_bits_tbl = {}
138
139-- Convert a LZ77 length (3<=len<=258) to
140-- a deflate literal/LZ77_length code (257<=code<=285)
141local _length_to_deflate_code = {}
142
143-- convert a LZ77 length (3<=len<=258) to
144-- a deflate literal/LZ77_length code extra bits.
145local _length_to_deflate_extra_bits = {}
146
147-- Convert a LZ77 length (3<=len<=258) to
148-- a deflate literal/LZ77_length code extra bit length.
149local _length_to_deflate_extra_bitlen = {}
150
151-- Convert a small LZ77 distance (1<=dist<=256) to a deflate code.
152local _dist256_to_deflate_code = {}
153
154-- Convert a small LZ77 distance (1<=dist<=256) to
155-- a deflate distance code extra bits.
156local _dist256_to_deflate_extra_bits = {}
157
158-- Convert a small LZ77 distance (1<=dist<=256) to
159-- a deflate distance code extra bit length.
160local _dist256_to_deflate_extra_bitlen = {}
161
162-- Convert a literal/LZ77_length deflate code to LZ77 base length
163-- The key of the table is (code - 256), 257<=code<=285
164local _literal_deflate_code_to_base_len = {
165 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31,
166 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258,
167}
168
169-- Convert a literal/LZ77_length deflate code to base LZ77 length extra bits
170-- The key of the table is (code - 256), 257<=code<=285
171local _literal_deflate_code_to_extra_bitlen = {
172 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2,
173 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0,
174}
175
176-- Convert a distance deflate code to base LZ77 distance. (0<=code<=29)
177local _dist_deflate_code_to_base_dist = {
178 [0] = 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
179 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,
180 8193, 12289, 16385, 24577,
181}
182
183-- Convert a distance deflate code to LZ77 bits length. (0<=code<=29)
184local _dist_deflate_code_to_extra_bitlen = {
185 [0] = 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6,
186 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13,
187}
188
189-- The code order of the first huffman header in the dynamic deflate block.
190-- See the page 12 of RFC1951
191local _rle_codes_huffman_bitlen_order = {16, 17, 18,
192 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15,
193}
194
195-- The following tables are used by fixed deflate block.
196-- The value of these tables are assigned at the bottom of the source.
197
198-- The huffman code of the literal/LZ77_length deflate codes,
199-- in fixed deflate block.
200local _fix_block_literal_huffman_code
201
202-- Convert huffman code of the literal/LZ77_length to deflate codes,
203-- in fixed deflate block.
204local _fix_block_literal_huffman_to_deflate_code
205
206-- The bit length of the huffman code of literal/LZ77_length deflate codes,
207-- in fixed deflate block.
208local _fix_block_literal_huffman_bitlen
209
210-- The count of each bit length of the literal/LZ77_length deflate codes,
211-- in fixed deflate block.
212local _fix_block_literal_huffman_bitlen_count
213
214-- The huffman code of the distance deflate codes,
215-- in fixed deflate block.
216local _fix_block_dist_huffman_code
217
218-- Convert huffman code of the distance to deflate codes,
219-- in fixed deflate block.
220local _fix_block_dist_huffman_to_deflate_code
221
222-- The bit length of the huffman code of the distance deflate codes,
223-- in fixed deflate block.
224local _fix_block_dist_huffman_bitlen
225
226-- The count of each bit length of the huffman code of
227-- the distance deflate codes,
228-- in fixed deflate block.
229local _fix_block_dist_huffman_bitlen_count
230
231for i = 0, 255 do
232 _byte_to_char[i] = string_char(i)
233end
234
235do
236 local pow = 1
237 for i = 0, 32 do
238 _pow2[i] = pow
239 pow = pow * 2
240 end
241end
242
243for i = 1, 9 do
244 _reverse_bits_tbl[i] = {}
245 for j=0, _pow2[i+1]-1 do
246 local reverse = 0
247 local value = j
248 for _ = 1, i do
249 -- The following line is equivalent to "res | (code %2)" in C.
250 reverse = reverse - reverse%2
251 + (((reverse%2==1) or (value % 2) == 1) and 1 or 0)
252 value = (value-value%2)/2
253 reverse = reverse * 2
254 end
255 _reverse_bits_tbl[i][j] = (reverse-reverse%2)/2
256 end
257end
258
259-- The source code is written according to the pattern in the numbers
260-- in RFC1951 Page10.
261do
262 local a = 18
263 local b = 16
264 local c = 265
265 local bitlen = 1
266 for len = 3, 258 do
267 if len <= 10 then
268 _length_to_deflate_code[len] = len + 254
269 _length_to_deflate_extra_bitlen[len] = 0
270 elseif len == 258 then
271 _length_to_deflate_code[len] = 285
272 _length_to_deflate_extra_bitlen[len] = 0
273 else
274 if len > a then
275 a = a + b
276 b = b * 2
277 c = c + 4
278 bitlen = bitlen + 1
279 end
280 local t = len-a-1+b/2
281 _length_to_deflate_code[len] = (t-(t%(b/8)))/(b/8) + c
282 _length_to_deflate_extra_bitlen[len] = bitlen
283 _length_to_deflate_extra_bits[len] = t % (b/8)
284 end
285 end
286end
287
288-- The source code is written according to the pattern in the numbers
289-- in RFC1951 Page11.
290do
291 _dist256_to_deflate_code[1] = 0
292 _dist256_to_deflate_code[2] = 1
293 _dist256_to_deflate_extra_bitlen[1] = 0
294 _dist256_to_deflate_extra_bitlen[2] = 0
295
296 local a = 3
297 local b = 4
298 local code = 2
299 local bitlen = 0
300 for dist = 3, 256 do
301 if dist > b then
302 a = a * 2
303 b = b * 2
304 code = code + 2
305 bitlen = bitlen + 1
306 end
307 _dist256_to_deflate_code[dist] = (dist <= a) and code or (code+1)
308 _dist256_to_deflate_extra_bitlen[dist] = (bitlen < 0) and 0 or bitlen
309 if b >= 8 then
310 _dist256_to_deflate_extra_bits[dist] = (dist-b/2-1) % (b/4)
311 end
312 end
313end
314
315-- CRC-16/CRC-32 computation
316local band, bnot, xor, lshift, rshift
317
318if bit ~= nil then
319 band = bit.band
320 bnot = bit.bnot
321 xor = bit.bxor
322 lshift = bit.blshift
323 rshift = bit.blogic_rshift
324elseif bit32 ~= nil then
325 band = bit32.band
326 bnot = bit32.bnot
327 xor = bit32.bxor
328 lshift = bit32.lshift
329 rshift = bit32.rshift
330else
331 xor = function(a, b)
332 local calc = 0
333
334 for i = 32, 0, -1 do
335 local val = 2 ^ i
336 local aa = false
337 local bb = false
338
339 if a == 0 then
340 calc = calc + b
341 break
342 end
343
344 if b == 0 then
345 calc = calc + a
346 break
347 end
348
349 if a >= val then
350 aa = true
351 a = a - val
352 end
353
354 if b >= val then
355 bb = true
356 b = b - val
357 end
358
359 if not (aa and bb) and (aa or bb) then
360 calc = calc + val
361 end
362 end
363
364 return calc
365 end
366
367 lshift = function(num, left)
368 local res = num * (2 ^ left)
369 return res % (2 ^ 32)
370 end
371
372 rshift = function(num, right)
373 local res = num / (2 ^ right)
374 return math.floor(res)
375 end
376
377 band = function(a, b)
378 local p,c=1,0
379 while a>0 and b>0 do
380 local ra,rb=a%2,b%2
381 if ra+rb>1 then c=c+p end
382 a,b,p=(a-ra)/2,(b-rb)/2,p*2
383 end
384 return c
385 end
386
387 bnot = function(x)
388 return bxor(x, (2^(bits or floor(log(x, 2))))-1)
389 end
390end
391
392-- CRC-32-IEEE 802.3 (V.42)
393local POLY = 0xEDB88320
394
395-- Memoize function pattern (like http://lua-users.org/wiki/FuncTables ).
396local function memoize(f)
397 local mt = {}
398 local t = setmetatable({}, mt)
399 function mt:__index(k)
400 local v = f(k); t[k] = v
401 return v
402 end
403 return t
404end
405
406-- CRC table.
407local crc_table = memoize(function(i)
408 local crc = i
409 for _=1,8 do
410 local b = band(crc, 1)
411 crc = rshift(crc, 1)
412 if b == 1 then crc = xor(crc, POLY) end
413 end
414 return crc
415end)
416
417
418local function crc32_byte(byte, crc)
419 crc = bnot(crc or 0)
420 local v1 = rshift(crc, 8)
421 local v2 = crc_table[xor(crc % 256, byte)]
422 return bnot(xor(v1, v2))
423end
424
425
426local function crc32_string(s, crc)
427 crc = crc or 0
428 for i=1,#s do
429 crc = crc32_byte(s:byte(i), crc)
430 end
431 return crc
432end
433
434--- Calculate the CRC-32 checksum of the string.
435-- @param s [string] the input string to calculate its CRC-32 checksum.
436-- @return [integer] The CRC-32 checksum, which is greater or equal to 0,
437-- and less than 2^32 (4294967296).
438function LibDeflate:CRC32(s, crc)
439 if type(s) == 'string' then
440 return crc32_string(s, crc)
441 else
442 return crc32_byte(s, crc)
443 end
444end
445
446--- Calculate the Adler-32 checksum of the string. <br>
447-- See RFC1950 Page 9 https://tools.ietf.org/html/rfc1950 for the
448-- definition of Adler-32 checksum.
449-- @param str [string] the input string to calcuate its Adler-32 checksum.
450-- @return [integer] The Adler-32 checksum, which is greater or equal to 0,
451-- and less than 2^32 (4294967296).
452function LibDeflate:Adler32(str)
453 -- This function is loop unrolled by better performance.
454 --
455 -- Here is the minimum code:
456 --
457 -- local a = 1
458 -- local b = 0
459 -- for i=1, #str do
460 -- local s = string.byte(str, i, i)
461 -- a = (a+s)%65521
462 -- b = (b+a)%65521
463 -- end
464 -- return b*65536+a
465 if type(str) ~= "string" then
466 error(("Usage: LibDeflate:Adler32(str):"
467 .." 'str' - string expected got '%s'."):format(type(str)), 2)
468 end
469 local strlen = #str
470
471 local i = 1
472 local a = 1
473 local b = 0
474 while i <= strlen - 15 do
475 local x1, x2, x3, x4, x5, x6, x7, x8,
476 x9, x10, x11, x12, x13, x14, x15, x16 = string_byte(str, i, i+15)
477 b = (b+16*a+16*x1+15*x2+14*x3+13*x4+12*x5+11*x6+10*x7+9*x8+8*x9
478 +7*x10+6*x11+5*x12+4*x13+3*x14+2*x15+x16)%65521
479 a = (a+x1+x2+x3+x4+x5+x6+x7+x8+x9+x10+x11+x12+x13+x14+x15+x16)%65521
480 i = i + 16
481 end
482 while (i <= strlen) do
483 local x = string_byte(str, i, i)
484 a = (a + x) % 65521
485 b = (b + a) % 65521
486 i = i + 1
487 end
488 return (b*65536+a) % 4294967296
489end
490
491-- Compare adler32 checksum.
492-- adler32 should be compared with a mod to avoid sign problem
493-- 4072834167 (unsigned) is the same adler32 as -222133129
494local function IsEqualAdler32(actual, expected)
495 return (actual % 4294967296) == (expected % 4294967296)
496end
497
498--- Create a preset dictionary.
499--
500-- This function is not fast, and the memory consumption of the produced
501-- dictionary is about 50 times of the input string. Therefore, it is suggestted
502-- to run this function only once in your program.
503--
504-- It is very important to know that if you do use a preset dictionary,
505-- compressors and decompressors MUST USE THE SAME dictionary. That is,
506-- dictionary must be created using the same string. If you update your program
507-- with a new dictionary, people with the old version won't be able to transmit
508-- data with people with the new version. Therefore, changing the dictionary
509-- must be very careful.
510--
511-- The parameters "strlen" and "adler32" add a layer of verification to ensure
512-- the parameter "str" is not modified unintentionally during the program
513-- development.
514--
515-- @usage local dict_str = "1234567890"
516--
517-- -- print(dict_str:len(), LibDeflate:Adler32(dict_str))
518-- -- Hardcode the print result below to verify it to avoid acciently
519-- -- modification of 'str' during the program development.
520-- -- string length: 10, Adler-32: 187433486,
521-- -- Don't calculate string length and its Adler-32 at run-time.
522--
523-- local dict = LibDeflate:CreateDictionary(dict_str, 10, 187433486)
524--
525-- @param str [string] The string used as the preset dictionary. <br>
526-- You should put stuffs that frequently appears in the dictionary
527-- string and preferablely put more frequently appeared stuffs toward the end
528-- of the string. <br>
529-- Empty string and string longer than 32768 bytes are not allowed.
530-- @param strlen [integer] The length of 'str'. Please pass in this parameter
531-- as a hardcoded constant, in order to verify the content of 'str'. The value
532-- of this parameter should be known before your program runs.
533-- @param adler32 [integer] The Adler-32 checksum of 'str'. Please pass in this
534-- parameter as a hardcoded constant, in order to verify the content of 'str'.
535-- The value of this parameter should be known before your program runs.
536-- @return [table] The dictionary used for preset dictionary compression and
537-- decompression.
538-- @raise error if 'strlen' does not match the length of 'str',
539-- or if 'adler32' does not match the Adler-32 checksum of 'str'.
540function LibDeflate:CreateDictionary(str, strlen, adler32)
541 if type(str) ~= "string" then
542 error(("Usage: LibDeflate:CreateDictionary(str, strlen, adler32):"
543 .." 'str' - string expected got '%s'."):format(type(str)), 2)
544 end
545 if type(strlen) ~= "number" then
546 error(("Usage: LibDeflate:CreateDictionary(str, strlen, adler32):"
547 .." 'strlen' - number expected got '%s'."):format(
548 type(strlen)), 2)
549 end
550 if type(adler32) ~= "number" then
551 error(("Usage: LibDeflate:CreateDictionary(str, strlen, adler32):"
552 .." 'adler32' - number expected got '%s'."):format(
553 type(adler32)), 2)
554 end
555 if strlen ~= #str then
556 error(("Usage: LibDeflate:CreateDictionary(str, strlen, adler32):"
557 .." 'strlen' does not match the actual length of 'str'."
558 .." 'strlen': %u, '#str': %u ."
559 .." Please check if 'str' is modified unintentionally.")
560 :format(strlen, #str))
561 end
562 if strlen == 0 then
563 error(("Usage: LibDeflate:CreateDictionary(str, strlen, adler32):"
564 .." 'str' - Empty string is not allowed."), 2)
565 end
566 if strlen > 32768 then
567 error(("Usage: LibDeflate:CreateDictionary(str, strlen, adler32):"
568 .." 'str' - string longer than 32768 bytes is not allowed."
569 .." Got %d bytes."):format(strlen), 2)
570 end
571 local actual_adler32 = self:Adler32(str)
572 if not IsEqualAdler32(adler32, actual_adler32) then
573 error(("Usage: LibDeflate:CreateDictionary(str, strlen, adler32):"
574 .." 'adler32' does not match the actual adler32 of 'str'."
575 .." 'adler32': %u, 'Adler32(str)': %u ."
576 .." Please check if 'str' is modified unintentionally.")
577 :format(adler32, actual_adler32))
578 end
579
580 local dictionary = {}
581 dictionary.adler32 = adler32
582 dictionary.hash_tables = {}
583 dictionary.string_table = {}
584 dictionary.strlen = strlen
585 local string_table = dictionary.string_table
586 local hash_tables = dictionary.hash_tables
587 string_table[1] = string_byte(str, 1, 1)
588 string_table[2] = string_byte(str, 2, 2)
589 if strlen >= 3 then
590 local i = 1
591 local hash = string_table[1]*256+string_table[2]
592 while i <= strlen - 2 - 3 do
593 local x1, x2, x3, x4 = string_byte(str, i+2, i+5)
594 string_table[i+2] = x1
595 string_table[i+3] = x2
596 string_table[i+4] = x3
597 string_table[i+5] = x4
598 hash = (hash*256+x1)%16777216
599 local t = hash_tables[hash]
600 if not t then t = {}; hash_tables[hash] = t end
601 t[#t+1] = i-strlen
602 i = i + 1
603 hash = (hash*256+x2)%16777216
604 t = hash_tables[hash]
605 if not t then t = {}; hash_tables[hash] = t end
606 t[#t+1] = i-strlen
607 i = i + 1
608 hash = (hash*256+x3)%16777216
609 t = hash_tables[hash]
610 if not t then t = {}; hash_tables[hash] = t end
611 t[#t+1] = i-strlen
612 i = i + 1
613 hash = (hash*256+x4)%16777216
614 t = hash_tables[hash]
615 if not t then t = {}; hash_tables[hash] = t end
616 t[#t+1] = i-strlen
617 i = i + 1
618 end
619 while i <= strlen - 2 do
620 local x = string_byte(str, i+2)
621 string_table[i+2] = x
622 hash = (hash*256+x)%16777216
623 local t = hash_tables[hash]
624 if not t then t = {}; hash_tables[hash] = t end
625 t[#t+1] = i-strlen
626 i = i + 1
627 end
628 end
629 return dictionary
630end
631
632-- Check if the dictionary is valid.
633-- @param dictionary The preset dictionary for compression and decompression.
634-- @return true if valid, false if not valid.
635-- @return if not valid, the error message.
636local function IsValidDictionary(dictionary)
637 if type(dictionary) ~= "table" then
638 return false, ("'dictionary' - table expected got '%s'.")
639 :format(type(dictionary))
640 end
641 if type(dictionary.adler32) ~= "number"
642 or type(dictionary.string_table) ~= "table"
643 or type(dictionary.strlen) ~= "number"
644 or dictionary.strlen <= 0
645 or dictionary.strlen > 32768
646 or dictionary.strlen ~= #dictionary.string_table
647 or type(dictionary.hash_tables) ~= "table"
648 then
649 return false, ("'dictionary' - corrupted dictionary.")
650 :format(type(dictionary))
651 end
652 return true, ""
653end
654
655--[[
656 key of the configuration table is the compression level,
657 and its value stores the compression setting.
658 These numbers come from zlib source code.
659
660 Higher compression level usually means better compression.
661 (Because LibDeflate uses a simplified version of zlib algorithm,
662 there is no guarantee that higher compression level does not create
663 bigger file than lower level, but I can say it's 99% likely)
664
665 Be careful with the high compression level. This is a pure lua
666 implementation compressor/decompressor, which is significant slower than
667 a C/C++ equivalant compressor/decompressor. Very high compression level
668 costs significant more CPU time, and usually compression size won't be
669 significant smaller when you increase compression level by 1, when the
670 level is already very high. Benchmark yourself if you can afford it.
671
672 See also https://github.com/madler/zlib/blob/master/doc/algorithm.txt,
673 https://github.com/madler/zlib/blob/master/deflate.c for more information.
674
675 The meaning of each field:
676 @field 1 use_lazy_evaluation:
677 true/false. Whether the program uses lazy evaluation.
678 See what is "lazy evaluation" in the link above.
679 lazy_evaluation improves ratio, but relatively slow.
680 @field 2 good_prev_length:
681 Only effective if lazy is set, Only use 1/4 of max_chain,
682 if prev length of lazy match is above this.
683 @field 3 max_insert_length/max_lazy_match:
684 If not using lazy evaluation,
685 insert new strings in the hash table only if the match length is not
686 greater than this length.
687 If using lazy evaluation, only continue lazy evaluation,
688 if previous match length is strictly smaller than this value.
689 @field 4 nice_length:
690 Number. Don't continue to go down the hash chain,
691 if match length is above this.
692 @field 5 max_chain:
693 Number. The maximum number of hash chains we look.
694--]]
695local _compression_level_configs = {
696 [0] = {false, nil, 0, 0, 0}, -- level 0, no compression
697 [1] = {false, nil, 4, 8, 4}, -- level 1, similar to zlib level 1
698 [2] = {false, nil, 5, 18, 8}, -- level 2, similar to zlib level 2
699 [3] = {false, nil, 6, 32, 32}, -- level 3, similar to zlib level 3
700 [4] = {true, 4, 4, 16, 16}, -- level 4, similar to zlib level 4
701 [5] = {true, 8, 16, 32, 32}, -- level 5, similar to zlib level 5
702 [6] = {true, 8, 16, 128, 128}, -- level 6, similar to zlib level 6
703 [7] = {true, 8, 32, 128, 256}, -- (SLOW) level 7, similar to zlib level 7
704 [8] = {true, 32, 128, 258, 1024} , --(SLOW) level 8,similar to zlib level 8
705 [9] = {true, 32, 258, 258, 4096},
706 -- (VERY SLOW) level 9, similar to zlib level 9
707}
708
709-- Check if the compression/decompression arguments is valid
710-- @param str The input string.
711-- @param check_dictionary if true, check if dictionary is valid.
712-- @param dictionary The preset dictionary for compression and decompression.
713-- @param check_configs if true, check if config is valid.
714-- @param configs The compression configuration table
715-- @return true if valid, false if not valid.
716-- @return if not valid, the error message.
717local function IsValidArguments(str,
718 check_dictionary, dictionary,
719 check_configs, configs)
720
721 if type(str) ~= "string" then
722 return false,
723 ("'str' - string expected got '%s'."):format(type(str))
724 end
725 if check_dictionary then
726 local dict_valid, dict_err = IsValidDictionary(dictionary)
727 if not dict_valid then
728 return false, dict_err
729 end
730 end
731 if check_configs then
732 local type_configs = type(configs)
733 if type_configs ~= "nil" and type_configs ~= "table" then
734 return false,
735 ("'configs' - nil or table expected got '%s'.")
736 :format(type(configs))
737 end
738 if type_configs == "table" then
739 for k, v in pairs(configs) do
740 if k ~= "level" and k ~= "strategy" then
741 return false,
742 ("'configs' - unsupported table key in the configs: '%s'.")
743 :format(k)
744 elseif k == "level" and not _compression_level_configs[v] then
745 return false,
746 ("'configs' - unsupported 'level': %s."):format(tostring(v))
747 elseif k == "strategy" and v ~= "fixed" and v ~= "huffman_only"
748 and v ~= "dynamic" then
749 -- random_block_type is for testing purpose
750 return false, ("'configs' - unsupported 'strategy': '%s'.")
751 :format(tostring(v))
752 end
753 end
754 end
755 end
756 return true, ""
757end
758
759
760
761--[[ --------------------------------------------------------------------------
762 Compress code
763--]] --------------------------------------------------------------------------
764
765-- partial flush to save memory
766local _FLUSH_MODE_MEMORY_CLEANUP = 0
767-- full flush with partial bytes
768local _FLUSH_MODE_OUTPUT = 1
769-- write bytes to get to byte boundary
770local _FLUSH_MODE_BYTE_BOUNDARY = 2
771-- no flush, just get num of bits written so far
772local _FLUSH_MODE_NO_FLUSH = 3
773
774--[[
775 Create an empty writer to easily write stuffs as the unit of bits.
776 Return values:
777 1. WriteBits(code, bitlen):
778 2. WriteString(str):
779 3. Flush(mode):
780--]]
781local function CreateWriter()
782 local buffer_size = 0
783 local cache = 0
784 local cache_bitlen = 0
785 local total_bitlen = 0
786 local buffer = {}
787 -- When buffer is big enough, flush into result_buffer to save memory.
788 local result_buffer = {}
789
790 -- Write bits with value "value" and bit length of "bitlen" into writer.
791 -- @param value: The value being written
792 -- @param bitlen: The bit length of "value"
793 -- @return nil
794 local function WriteBits(value, bitlen)
795 cache = cache + value * _pow2[cache_bitlen]
796 cache_bitlen = cache_bitlen + bitlen
797 total_bitlen = total_bitlen + bitlen
798 -- Only bulk to buffer every 4 bytes. This is quicker.
799 if cache_bitlen >= 32 then
800 buffer_size = buffer_size + 1
801 buffer[buffer_size] =
802 _byte_to_char[cache % 256]
803 .._byte_to_char[((cache-cache%256)/256 % 256)]
804 .._byte_to_char[((cache-cache%65536)/65536 % 256)]
805 .._byte_to_char[((cache-cache%16777216)/16777216 % 256)]
806 local rshift_mask = _pow2[32 - cache_bitlen + bitlen]
807 cache = (value - value%rshift_mask)/rshift_mask
808 cache_bitlen = cache_bitlen - 32
809 end
810 end
811
812 -- Write the entire string into the writer.
813 -- @param str The string being written
814 -- @return nil
815 local function WriteString(str)
816 for _ = 1, cache_bitlen, 8 do
817 buffer_size = buffer_size + 1
818 buffer[buffer_size] = string_char(cache % 256)
819 cache = (cache-cache%256)/256
820 end
821 cache_bitlen = 0
822 buffer_size = buffer_size + 1
823 buffer[buffer_size] = str
824 total_bitlen = total_bitlen + #str*8
825 end
826
827 -- Flush current stuffs in the writer and return it.
828 -- This operation will free most of the memory.
829 -- @param mode See the descrtion of the constant and the source code.
830 -- @return The total number of bits stored in the writer right now.
831 -- for byte boundary mode, it includes the padding bits.
832 -- for output mode, it does not include padding bits.
833 -- @return Return the outputs if mode is output.
834 local function FlushWriter(mode)
835 if mode == _FLUSH_MODE_NO_FLUSH then
836 return total_bitlen
837 end
838
839 if mode == _FLUSH_MODE_OUTPUT
840 or mode == _FLUSH_MODE_BYTE_BOUNDARY then
841 -- Full flush, also output cache.
842 -- Need to pad some bits if cache_bitlen is not multiple of 8.
843 local padding_bitlen = (8 - cache_bitlen % 8) % 8
844
845 if cache_bitlen > 0 then
846 -- padding with all 1 bits, mainly because "\000" is not
847 -- good to be tranmitted. I do this so "\000" is a little bit
848 -- less frequent.
849 cache = cache - _pow2[cache_bitlen]
850 + _pow2[cache_bitlen+padding_bitlen]
851 for _ = 1, cache_bitlen, 8 do
852 buffer_size = buffer_size + 1
853 buffer[buffer_size] = _byte_to_char[cache % 256]
854 cache = (cache-cache%256)/256
855 end
856
857 cache = 0
858 cache_bitlen = 0
859 end
860 if mode == _FLUSH_MODE_BYTE_BOUNDARY then
861 total_bitlen = total_bitlen + padding_bitlen
862 return total_bitlen
863 end
864 end
865
866 local flushed = table_concat(buffer)
867 buffer = {}
868 buffer_size = 0
869 result_buffer[#result_buffer+1] = flushed
870
871 if mode == _FLUSH_MODE_MEMORY_CLEANUP then
872 return total_bitlen
873 else
874 return total_bitlen, table_concat(result_buffer)
875 end
876 end
877
878 return WriteBits, WriteString, FlushWriter
879end
880
881-- Push an element into a max heap
882-- @param heap A max heap whose max element is at index 1.
883-- @param e The element to be pushed. Assume element "e" is a table
884-- and comparison is done via its first entry e[1]
885-- @param heap_size current number of elements in the heap.
886-- NOTE: There may be some garbage stored in
887-- heap[heap_size+1], heap[heap_size+2], etc..
888-- @return nil
889local function MinHeapPush(heap, e, heap_size)
890 heap_size = heap_size + 1
891 heap[heap_size] = e
892 local value = e[1]
893 local pos = heap_size
894 local parent_pos = (pos-pos%2)/2
895
896 while (parent_pos >= 1 and heap[parent_pos][1] > value) do
897 local t = heap[parent_pos]
898 heap[parent_pos] = e
899 heap[pos] = t
900 pos = parent_pos
901 parent_pos = (parent_pos-parent_pos%2)/2
902 end
903end
904
905-- Pop an element from a max heap
906-- @param heap A max heap whose max element is at index 1.
907-- @param heap_size current number of elements in the heap.
908-- @return the poped element
909-- Note: This function does not change table size of "heap" to save CPU time.
910local function MinHeapPop(heap, heap_size)
911 local top = heap[1]
912 local e = heap[heap_size]
913 local value = e[1]
914 heap[1] = e
915 heap[heap_size] = top
916 heap_size = heap_size - 1
917
918 local pos = 1
919 local left_child_pos = pos * 2
920 local right_child_pos = left_child_pos + 1
921
922 while (left_child_pos <= heap_size) do
923 local left_child = heap[left_child_pos]
924 if (right_child_pos <= heap_size
925 and heap[right_child_pos][1] < left_child[1]) then
926 local right_child = heap[right_child_pos]
927 if right_child[1] < value then
928 heap[right_child_pos] = e
929 heap[pos] = right_child
930 pos = right_child_pos
931 left_child_pos = pos * 2
932 right_child_pos = left_child_pos + 1
933 else
934 break
935 end
936 else
937 if left_child[1] < value then
938 heap[left_child_pos] = e
939 heap[pos] = left_child
940 pos = left_child_pos
941 left_child_pos = pos * 2
942 right_child_pos = left_child_pos + 1
943 else
944 break
945 end
946 end
947 end
948
949 return top
950end
951
952-- Deflate defines a special huffman tree, which is unique once the bit length
953-- of huffman code of all symbols are known.
954-- @param bitlen_count Number of symbols with a specific bitlen
955-- @param symbol_bitlen The bit length of a symbol
956-- @param max_symbol The max symbol among all symbols,
957-- which is (number of symbols - 1)
958-- @param max_bitlen The max huffman bit length among all symbols.
959-- @return The huffman code of all symbols.
960local function GetHuffmanCodeFromBitlen(bitlen_counts, symbol_bitlens
961 , max_symbol, max_bitlen)
962 local huffman_code = 0
963 local next_codes = {}
964 local symbol_huffman_codes = {}
965 for bitlen = 1, max_bitlen do
966 huffman_code = (huffman_code+(bitlen_counts[bitlen-1] or 0))*2
967 next_codes[bitlen] = huffman_code
968 end
969 for symbol = 0, max_symbol do
970 local bitlen = symbol_bitlens[symbol]
971 if bitlen then
972 huffman_code = next_codes[bitlen]
973 next_codes[bitlen] = huffman_code + 1
974
975 -- Reverse the bits of huffman code,
976 -- because most signifant bits of huffman code
977 -- is stored first into the compressed data.
978 -- @see RFC1951 Page5 Section 3.1.1
979 if bitlen <= 9 then -- Have cached reverse for small bitlen.
980 symbol_huffman_codes[symbol] =
981 _reverse_bits_tbl[bitlen][huffman_code]
982 else
983 local reverse = 0
984 for _ = 1, bitlen do
985 reverse = reverse - reverse%2
986 + (((reverse%2==1)
987 or (huffman_code % 2) == 1) and 1 or 0)
988 huffman_code = (huffman_code-huffman_code%2)/2
989 reverse = reverse*2
990 end
991 symbol_huffman_codes[symbol] = (reverse-reverse%2)/2
992 end
993 end
994 end
995 return symbol_huffman_codes
996end
997
998-- A helper function to sort heap elements
999-- a[1], b[1] is the huffman frequency
1000-- a[2], b[2] is the symbol value.
1001local function SortByFirstThenSecond(a, b)
1002 return a[1] < b[1] or
1003 (a[1] == b[1] and a[2] < b[2])
1004end
1005
1006-- Calculate the huffman bit length and huffman code.
1007-- @param symbol_count: A table whose table key is the symbol, and table value
1008-- is the symbol frenquency (nil means 0 frequency).
1009-- @param max_bitlen: See description of return value.
1010-- @param max_symbol: The maximum symbol
1011-- @return a table whose key is the symbol, and the value is the huffman bit
1012-- bit length. We guarantee that all bit length <= max_bitlen.
1013-- For 0<=symbol<=max_symbol, table value could be nil if the frequency
1014-- of the symbol is 0 or nil.
1015-- @return a table whose key is the symbol, and the value is the huffman code.
1016-- @return a number indicating the maximum symbol whose bitlen is not 0.
1017local function GetHuffmanBitlenAndCode(symbol_counts, max_bitlen, max_symbol)
1018 local heap_size
1019 local max_non_zero_bitlen_symbol = -1
1020 local leafs = {}
1021 local heap = {}
1022 local symbol_bitlens = {}
1023 local symbol_codes = {}
1024 local bitlen_counts = {}
1025
1026 --[[
1027 tree[1]: weight, temporarily used as parent and bitLengths
1028 tree[2]: symbol
1029 tree[3]: left child
1030 tree[4]: right child
1031 --]]
1032 local number_unique_symbols = 0
1033 for symbol, count in pairs(symbol_counts) do
1034 number_unique_symbols = number_unique_symbols + 1
1035 leafs[number_unique_symbols] = {count, symbol}
1036 end
1037
1038 if (number_unique_symbols == 0) then
1039 -- no code.
1040 return {}, {}, -1
1041 elseif (number_unique_symbols == 1) then
1042 -- Only one code. In this case, its huffman code
1043 -- needs to be assigned as 0, and bit length is 1.
1044 -- This is the only case that the return result
1045 -- represents an imcomplete huffman tree.
1046 local symbol = leafs[1][2]
1047 symbol_bitlens[symbol] = 1
1048 symbol_codes[symbol] = 0
1049 return symbol_bitlens, symbol_codes, symbol
1050 else
1051 table_sort(leafs, SortByFirstThenSecond)
1052 heap_size = number_unique_symbols
1053 for i = 1, heap_size do
1054 heap[i] = leafs[i]
1055 end
1056
1057 while (heap_size > 1) do
1058 -- Note: pop does not change table size of heap
1059 local leftChild = MinHeapPop(heap, heap_size)
1060 heap_size = heap_size - 1
1061 local rightChild = MinHeapPop(heap, heap_size)
1062 heap_size = heap_size - 1
1063 local newNode =
1064 {leftChild[1]+rightChild[1], -1, leftChild, rightChild}
1065 MinHeapPush(heap, newNode, heap_size)
1066 heap_size = heap_size + 1
1067 end
1068
1069 -- Number of leafs whose bit length is greater than max_len.
1070 local number_bitlen_overflow = 0
1071
1072 -- Calculate bit length of all nodes
1073 local fifo = {heap[1], 0, 0, 0} -- preallocate some spaces.
1074 local fifo_size = 1
1075 local index = 1
1076 heap[1][1] = 0
1077 while (index <= fifo_size) do -- Breath first search
1078 local e = fifo[index]
1079 local bitlen = e[1]
1080 local symbol = e[2]
1081 local left_child = e[3]
1082 local right_child = e[4]
1083 if left_child then
1084 fifo_size = fifo_size + 1
1085 fifo[fifo_size] = left_child
1086 left_child[1] = bitlen + 1
1087 end
1088 if right_child then
1089 fifo_size = fifo_size + 1
1090 fifo[fifo_size] = right_child
1091 right_child[1] = bitlen + 1
1092 end
1093 index = index + 1
1094
1095 if (bitlen > max_bitlen) then
1096 number_bitlen_overflow = number_bitlen_overflow + 1
1097 bitlen = max_bitlen
1098 end
1099 if symbol >= 0 then
1100 symbol_bitlens[symbol] = bitlen
1101 max_non_zero_bitlen_symbol =
1102 (symbol > max_non_zero_bitlen_symbol)
1103 and symbol or max_non_zero_bitlen_symbol
1104 bitlen_counts[bitlen] = (bitlen_counts[bitlen] or 0) + 1
1105 end
1106 end
1107
1108 -- Resolve bit length overflow
1109 -- @see ZLib/trees.c:gen_bitlen(s, desc), for reference
1110 if (number_bitlen_overflow > 0) then
1111 repeat
1112 local bitlen = max_bitlen - 1
1113 while ((bitlen_counts[bitlen] or 0) == 0) do
1114 bitlen = bitlen - 1
1115 end
1116 -- move one leaf down the tree
1117 bitlen_counts[bitlen] = bitlen_counts[bitlen] - 1
1118 -- move one overflow item as its brother
1119 bitlen_counts[bitlen+1] = (bitlen_counts[bitlen+1] or 0) + 2
1120 bitlen_counts[max_bitlen] = bitlen_counts[max_bitlen] - 1
1121 number_bitlen_overflow = number_bitlen_overflow - 2
1122 until (number_bitlen_overflow <= 0)
1123
1124 index = 1
1125 for bitlen = max_bitlen, 1, -1 do
1126 local n = bitlen_counts[bitlen] or 0
1127 while (n > 0) do
1128 local symbol = leafs[index][2]
1129 symbol_bitlens[symbol] = bitlen
1130 n = n - 1
1131 index = index + 1
1132 end
1133 end
1134 end
1135
1136 symbol_codes = GetHuffmanCodeFromBitlen(bitlen_counts, symbol_bitlens,
1137 max_symbol, max_bitlen)
1138 return symbol_bitlens, symbol_codes, max_non_zero_bitlen_symbol
1139 end
1140end
1141
1142-- Calculate the first huffman header in the dynamic huffman block
1143-- @see RFC1951 Page 12
1144-- @param lcode_bitlen: The huffman bit length of literal/LZ77_length.
1145-- @param max_non_zero_bitlen_lcode: The maximum literal/LZ77_length symbol
1146-- whose huffman bit length is not zero.
1147-- @param dcode_bitlen: The huffman bit length of LZ77 distance.
1148-- @param max_non_zero_bitlen_dcode: The maximum LZ77 distance symbol
1149-- whose huffman bit length is not zero.
1150-- @return The run length encoded codes.
1151-- @return The extra bits. One entry for each rle code that needs extra bits.
1152-- (code == 16 or 17 or 18).
1153-- @return The count of appearance of each rle codes.
1154local function RunLengthEncodeHuffmanBitlen(
1155 lcode_bitlens,
1156 max_non_zero_bitlen_lcode,
1157 dcode_bitlens,
1158 max_non_zero_bitlen_dcode)
1159 local rle_code_tblsize = 0
1160 local rle_codes = {}
1161 local rle_code_counts = {}
1162 local rle_extra_bits_tblsize = 0
1163 local rle_extra_bits = {}
1164 local prev = nil
1165 local count = 0
1166
1167 -- If there is no distance code, assume one distance code of bit length 0.
1168 -- RFC1951: One distance code of zero bits means that
1169 -- there are no distance codes used at all (the data is all literals).
1170 max_non_zero_bitlen_dcode = (max_non_zero_bitlen_dcode < 0)
1171 and 0 or max_non_zero_bitlen_dcode
1172 local max_code = max_non_zero_bitlen_lcode+max_non_zero_bitlen_dcode+1
1173
1174 for code = 0, max_code+1 do
1175 local len = (code <= max_non_zero_bitlen_lcode)
1176 and (lcode_bitlens[code] or 0)
1177 or ((code <= max_code)
1178 and (dcode_bitlens[code-max_non_zero_bitlen_lcode-1] or 0) or nil)
1179 if len == prev then
1180 count = count + 1
1181 if len ~= 0 and count == 6 then
1182 rle_code_tblsize = rle_code_tblsize + 1
1183 rle_codes[rle_code_tblsize] = 16
1184 rle_extra_bits_tblsize = rle_extra_bits_tblsize + 1
1185 rle_extra_bits[rle_extra_bits_tblsize] = 3
1186 rle_code_counts[16] = (rle_code_counts[16] or 0) + 1
1187 count = 0
1188 elseif len == 0 and count == 138 then
1189 rle_code_tblsize = rle_code_tblsize + 1
1190 rle_codes[rle_code_tblsize] = 18
1191 rle_extra_bits_tblsize = rle_extra_bits_tblsize + 1
1192 rle_extra_bits[rle_extra_bits_tblsize] = 127
1193 rle_code_counts[18] = (rle_code_counts[18] or 0) + 1
1194 count = 0
1195 end
1196 else
1197 if count == 1 then
1198 rle_code_tblsize = rle_code_tblsize + 1
1199 rle_codes[rle_code_tblsize] = prev
1200 rle_code_counts[prev] = (rle_code_counts[prev] or 0) + 1
1201 elseif count == 2 then
1202 rle_code_tblsize = rle_code_tblsize + 1
1203 rle_codes[rle_code_tblsize] = prev
1204 rle_code_tblsize = rle_code_tblsize + 1
1205 rle_codes[rle_code_tblsize] = prev
1206 rle_code_counts[prev] = (rle_code_counts[prev] or 0) + 2
1207 elseif count >= 3 then
1208 rle_code_tblsize = rle_code_tblsize + 1
1209 local rleCode = (prev ~= 0) and 16 or (count <= 10 and 17 or 18)
1210 rle_codes[rle_code_tblsize] = rleCode
1211 rle_code_counts[rleCode] = (rle_code_counts[rleCode] or 0) + 1
1212 rle_extra_bits_tblsize = rle_extra_bits_tblsize + 1
1213 rle_extra_bits[rle_extra_bits_tblsize] =
1214 (count <= 10) and (count - 3) or (count - 11)
1215 end
1216
1217 prev = len
1218 if len and len ~= 0 then
1219 rle_code_tblsize = rle_code_tblsize + 1
1220 rle_codes[rle_code_tblsize] = len
1221 rle_code_counts[len] = (rle_code_counts[len] or 0) + 1
1222 count = 0
1223 else
1224 count = 1
1225 end
1226 end
1227 end
1228
1229 return rle_codes, rle_extra_bits, rle_code_counts
1230end
1231
1232-- Load the string into a table, in order to speed up LZ77.
1233-- Loop unrolled 16 times to speed this function up.
1234-- @param str The string to be loaded.
1235-- @param t The load destination
1236-- @param start str[index] will be the first character to be loaded.
1237-- @param end str[index] will be the last character to be loaded
1238-- @param offset str[index] will be loaded into t[index-offset]
1239-- @return t
1240local function LoadStringToTable(str, t, start, stop, offset)
1241 local i = start - offset
1242 while i <= stop - 15 - offset do
1243 t[i], t[i+1], t[i+2], t[i+3], t[i+4], t[i+5], t[i+6], t[i+7], t[i+8],
1244 t[i+9], t[i+10], t[i+11], t[i+12], t[i+13], t[i+14], t[i+15] =
1245 string_byte(str, i + offset, i + 15 + offset)
1246 i = i + 16
1247 end
1248 while (i <= stop - offset) do
1249 t[i] = string_byte(str, i + offset, i + offset)
1250 i = i + 1
1251 end
1252 return t
1253end
1254
1255-- Do LZ77 process. This function uses the majority of the CPU time.
1256-- @see zlib/deflate.c:deflate_fast(), zlib/deflate.c:deflate_slow()
1257-- @see https://github.com/madler/zlib/blob/master/doc/algorithm.txt
1258-- This function uses the algorithms used above. You should read the
1259-- algorithm.txt above to understand what is the hash function and the
1260-- lazy evaluation.
1261--
1262-- The special optimization used here is hash functions used here.
1263-- The hash function is just the multiplication of the three consective
1264-- characters. So if the hash matches, it guarantees 3 characters are matched.
1265-- This optimization can be implemented because Lua table is a hash table.
1266--
1267-- @param level integer that describes compression level.
1268-- @param string_table table that stores the value of string to be compressed.
1269-- The index of this table starts from 1.
1270-- The caller needs to make sure all values needed by this function
1271-- are loaded.
1272-- Assume "str" is the origin input string into the compressor
1273-- str[block_start]..str[block_end+3] needs to be loaded into
1274-- string_table[block_start-offset]..string_table[block_end-offset]
1275-- If dictionary is presented, the last 258 bytes of the dictionary
1276-- needs to be loaded into sing_table[-257..0]
1277-- (See more in the description of offset.)
1278-- @param hash_tables. The table key is the hash value (0<=hash<=16777216=256^3)
1279-- The table value is an array0 that stores the indexes of the
1280-- input data string to be compressed, such that
1281-- hash == str[index]*str[index+1]*str[index+2]
1282-- Indexes are ordered in this array.
1283-- @param block_start The indexes of the input data string to be compressed.
1284-- that starts the LZ77 block.
1285-- @param block_end The indexes of the input data string to be compressed.
1286-- that stores the LZ77 block.
1287-- @param offset str[index] is stored in string_table[index-offset],
1288-- This offset is mainly an optimization to limit the index
1289-- of string_table, so lua can access this table quicker.
1290-- @param dictionary See LibDeflate:CreateDictionary
1291-- @return literal/LZ77_length deflate codes.
1292-- @return the extra bits of literal/LZ77_length deflate codes.
1293-- @return the count of each literal/LZ77 deflate code.
1294-- @return LZ77 distance deflate codes.
1295-- @return the extra bits of LZ77 distance deflate codes.
1296-- @return the count of each LZ77 distance deflate code.
1297local function GetBlockLZ77Result(level, string_table, hash_tables, block_start,
1298 block_end, offset, dictionary)
1299 local config = _compression_level_configs[level]
1300 local config_use_lazy
1301 , config_good_prev_length
1302 , config_max_lazy_match
1303 , config_nice_length
1304 , config_max_hash_chain =
1305 config[1], config[2], config[3], config[4], config[5]
1306
1307 local config_max_insert_length = (not config_use_lazy)
1308 and config_max_lazy_match or 2147483646
1309 local config_good_hash_chain =
1310 (config_max_hash_chain-config_max_hash_chain%4/4)
1311
1312 local hash
1313
1314 local dict_hash_tables
1315 local dict_string_table
1316 local dict_string_len = 0
1317
1318 if dictionary then
1319 dict_hash_tables = dictionary.hash_tables
1320 dict_string_table = dictionary.string_table
1321 dict_string_len = dictionary.strlen
1322 assert(block_start == 1)
1323 if block_end >= block_start and dict_string_len >= 2 then
1324 hash = dict_string_table[dict_string_len-1]*65536
1325 + dict_string_table[dict_string_len]*256 + string_table[1]
1326 local t = hash_tables[hash]
1327 if not t then t = {}; hash_tables[hash] = t end
1328 t[#t+1] = -1
1329 end
1330 if block_end >= block_start+1 and dict_string_len >= 1 then
1331 hash = dict_string_table[dict_string_len]*65536
1332 + string_table[1]*256 + string_table[2]
1333 local t = hash_tables[hash]
1334 if not t then t = {}; hash_tables[hash] = t end
1335 t[#t+1] = 0
1336 end
1337 end
1338
1339 hash = (string_table[block_start-offset] or 0)*256
1340 + (string_table[block_start+1-offset] or 0)
1341
1342 local lcodes = {}
1343 local lcode_tblsize = 0
1344 local lcodes_counts = {}
1345 local dcodes = {}
1346 local dcodes_tblsize = 0
1347 local dcodes_counts = {}
1348
1349 local lextra_bits = {}
1350 local lextra_bits_tblsize = 0
1351 local dextra_bits = {}
1352 local dextra_bits_tblsize = 0
1353
1354 local match_available = false
1355 local prev_len
1356 local prev_dist
1357 local cur_len = 0
1358 local cur_dist = 0
1359
1360 local index = block_start
1361 local index_end = block_end + (config_use_lazy and 1 or 0)
1362
1363 -- the zlib source code writes separate code for lazy evaluation and
1364 -- not lazy evaluation, which is easier to understand.
1365 -- I put them together, so it is a bit harder to understand.
1366 -- because I think this is easier for me to maintain it.
1367 while (index <= index_end) do
1368 local string_table_index = index - offset
1369 prev_len = cur_len
1370 prev_dist = cur_dist
1371 cur_len = 0
1372
1373 hash = (hash*256+(string_table[string_table_index+2] or 0))%16777216
1374
1375 local chain_index
1376 local cur_chain
1377 local hash_chain = hash_tables[hash]
1378 local chain_old_size
1379 if not hash_chain then
1380 chain_old_size = 0
1381 hash_chain = {}
1382 hash_tables[hash] = hash_chain
1383 if dict_hash_tables then
1384 cur_chain = dict_hash_tables[hash]
1385 chain_index = cur_chain and #cur_chain or 0
1386 else
1387 chain_index = 0
1388 end
1389 else
1390 chain_old_size = #hash_chain
1391 cur_chain = hash_chain
1392 chain_index = chain_old_size
1393 end
1394
1395 if index <= block_end then
1396 hash_chain[chain_old_size+1] = index
1397 end
1398
1399 if (chain_index > 0 and index + 2 <= block_end
1400 and (not config_use_lazy or prev_len < config_max_lazy_match)) then
1401
1402 local depth =
1403 (config_use_lazy and prev_len >= config_good_prev_length)
1404 and config_good_hash_chain or config_max_hash_chain
1405
1406 while chain_index >= 1 and depth > 0 do
1407 local prev = cur_chain[chain_index]
1408
1409 if index - prev > 32768 then
1410 break
1411 end
1412 if prev < index then
1413 local j = 3
1414
1415 if prev >= -257 then
1416 local prev_table_index = prev-offset
1417 -- NOTE for author:
1418 -- j < 258 and index + j <= block_end
1419 -- This is the right condition
1420 while (j < 258 and index + j <= block_end) do
1421 if (string_table[prev_table_index+j]
1422 == string_table[string_table_index+j]) then
1423 j = j + 1
1424 else
1425 break
1426 end
1427 end
1428 else
1429 local prev_table_index = dict_string_len+prev
1430 -- NOTE for author:
1431 -- j < 258 and index + j <= block_end
1432 -- This is the right condition
1433 while (j < 258 and index + j <= block_end) do
1434 if (dict_string_table[prev_table_index+j]
1435 == string_table[string_table_index+j]) then
1436 j = j + 1
1437 else
1438 break
1439 end
1440 end
1441 end
1442 if j > cur_len then
1443 cur_len = j
1444 cur_dist = index - prev
1445 end
1446 if cur_len >= config_nice_length then
1447 break
1448 end
1449 end
1450
1451 chain_index = chain_index - 1
1452 depth = depth - 1
1453 if chain_index == 0 and prev > 0 and dict_hash_tables then
1454 cur_chain = dict_hash_tables[hash]
1455 chain_index = cur_chain and #cur_chain or 0
1456 end
1457 end
1458 end
1459
1460 if not config_use_lazy then
1461 prev_len, prev_dist = cur_len, cur_dist
1462 end
1463 if ((not config_use_lazy or match_available)
1464 and (prev_len > 3 or (prev_len == 3 and prev_dist < 4096))
1465 and cur_len <= prev_len )then
1466 local code = _length_to_deflate_code[prev_len]
1467 local length_extra_bits_bitlen =
1468 _length_to_deflate_extra_bitlen[prev_len]
1469 local dist_code, dist_extra_bits_bitlen, dist_extra_bits
1470 if prev_dist <= 256 then -- have cached code for small distance.
1471 dist_code = _dist256_to_deflate_code[prev_dist]
1472 dist_extra_bits = _dist256_to_deflate_extra_bits[prev_dist]
1473 dist_extra_bits_bitlen =
1474 _dist256_to_deflate_extra_bitlen[prev_dist]
1475 else
1476 dist_code = 16
1477 dist_extra_bits_bitlen = 7
1478 local a = 384
1479 local b = 512
1480
1481 while true do
1482 if prev_dist <= a then
1483 dist_extra_bits = (prev_dist-(b/2)-1) % (b/4)
1484 break
1485 elseif prev_dist <= b then
1486 dist_extra_bits = (prev_dist-(b/2)-1) % (b/4)
1487 dist_code = dist_code + 1
1488 break
1489 else
1490 dist_code = dist_code + 2
1491 dist_extra_bits_bitlen = dist_extra_bits_bitlen + 1
1492 a = a*2
1493 b = b*2
1494 end
1495 end
1496 end
1497 lcode_tblsize = lcode_tblsize + 1
1498 lcodes[lcode_tblsize] = code
1499 lcodes_counts[code] = (lcodes_counts[code] or 0) + 1
1500
1501 dcodes_tblsize = dcodes_tblsize + 1
1502 dcodes[dcodes_tblsize] = dist_code
1503 dcodes_counts[dist_code] = (dcodes_counts[dist_code] or 0) + 1
1504
1505 if length_extra_bits_bitlen > 0 then
1506 local lenExtraBits = _length_to_deflate_extra_bits[prev_len]
1507 lextra_bits_tblsize = lextra_bits_tblsize + 1
1508 lextra_bits[lextra_bits_tblsize] = lenExtraBits
1509 end
1510 if dist_extra_bits_bitlen > 0 then
1511 dextra_bits_tblsize = dextra_bits_tblsize + 1
1512 dextra_bits[dextra_bits_tblsize] = dist_extra_bits
1513 end
1514
1515 for i=index+1, index+prev_len-(config_use_lazy and 2 or 1) do
1516 hash = (hash*256+(string_table[i-offset+2] or 0))%16777216
1517 if prev_len <= config_max_insert_length then
1518 hash_chain = hash_tables[hash]
1519 if not hash_chain then
1520 hash_chain = {}
1521 hash_tables[hash] = hash_chain
1522 end
1523 hash_chain[#hash_chain+1] = i
1524 end
1525 end
1526 index = index + prev_len - (config_use_lazy and 1 or 0)
1527 match_available = false
1528 elseif (not config_use_lazy) or match_available then
1529 local code = string_table[config_use_lazy
1530 and (string_table_index-1) or string_table_index]
1531 lcode_tblsize = lcode_tblsize + 1
1532 lcodes[lcode_tblsize] = code
1533 lcodes_counts[code] = (lcodes_counts[code] or 0) + 1
1534 index = index + 1
1535 else
1536 match_available = true
1537 index = index + 1
1538 end
1539 end
1540
1541 -- Write "end of block" symbol
1542 lcode_tblsize = lcode_tblsize + 1
1543 lcodes[lcode_tblsize] = 256
1544 lcodes_counts[256] = (lcodes_counts[256] or 0) + 1
1545
1546 return lcodes, lextra_bits, lcodes_counts, dcodes, dextra_bits
1547 , dcodes_counts
1548end
1549
1550-- Get the header data of dynamic block.
1551-- @param lcodes_count The count of each literal/LZ77_length codes.
1552-- @param dcodes_count The count of each Lz77 distance codes.
1553-- @return a lots of stuffs.
1554-- @see RFC1951 Page 12
1555local function GetBlockDynamicHuffmanHeader(lcodes_counts, dcodes_counts)
1556 local lcodes_huffman_bitlens, lcodes_huffman_codes
1557 , max_non_zero_bitlen_lcode =
1558 GetHuffmanBitlenAndCode(lcodes_counts, 15, 285)
1559 local dcodes_huffman_bitlens, dcodes_huffman_codes
1560 , max_non_zero_bitlen_dcode =
1561 GetHuffmanBitlenAndCode(dcodes_counts, 15, 29)
1562
1563 local rle_deflate_codes, rle_extra_bits, rle_codes_counts =
1564 RunLengthEncodeHuffmanBitlen(lcodes_huffman_bitlens
1565 ,max_non_zero_bitlen_lcode, dcodes_huffman_bitlens
1566 , max_non_zero_bitlen_dcode)
1567
1568 local rle_codes_huffman_bitlens, rle_codes_huffman_codes =
1569 GetHuffmanBitlenAndCode(rle_codes_counts, 7, 18)
1570
1571 local HCLEN = 0
1572 for i = 1, 19 do
1573 local symbol = _rle_codes_huffman_bitlen_order[i]
1574 local length = rle_codes_huffman_bitlens[symbol] or 0
1575 if length ~= 0 then
1576 HCLEN = i
1577 end
1578 end
1579
1580 HCLEN = HCLEN - 4
1581 local HLIT = max_non_zero_bitlen_lcode + 1 - 257
1582 local HDIST = max_non_zero_bitlen_dcode + 1 - 1
1583 if HDIST < 0 then HDIST = 0 end
1584
1585 return HLIT, HDIST, HCLEN, rle_codes_huffman_bitlens
1586 , rle_codes_huffman_codes, rle_deflate_codes, rle_extra_bits
1587 , lcodes_huffman_bitlens, lcodes_huffman_codes
1588 , dcodes_huffman_bitlens, dcodes_huffman_codes
1589end
1590
1591-- Get the size of dynamic block without writing any bits into the writer.
1592-- @param ... Read the source code of GetBlockDynamicHuffmanHeader()
1593-- @return the bit length of the dynamic block
1594local function GetDynamicHuffmanBlockSize(lcodes, dcodes, HCLEN
1595 , rle_codes_huffman_bitlens, rle_deflate_codes
1596 , lcodes_huffman_bitlens, dcodes_huffman_bitlens)
1597
1598 local block_bitlen = 17 -- 1+2+5+5+4
1599 block_bitlen = block_bitlen + (HCLEN+4)*3
1600
1601 for i = 1, #rle_deflate_codes do
1602 local code = rle_deflate_codes[i]
1603 block_bitlen = block_bitlen + rle_codes_huffman_bitlens[code]
1604 if code >= 16 then
1605 block_bitlen = block_bitlen +
1606 ((code == 16) and 2 or (code == 17 and 3 or 7))
1607 end
1608 end
1609
1610 local length_code_count = 0
1611 for i = 1, #lcodes do
1612 local code = lcodes[i]
1613 local huffman_bitlen = lcodes_huffman_bitlens[code]
1614 block_bitlen = block_bitlen + huffman_bitlen
1615 if code > 256 then -- Length code
1616 length_code_count = length_code_count + 1
1617 if code > 264 and code < 285 then -- Length code with extra bits
1618 local extra_bits_bitlen =
1619 _literal_deflate_code_to_extra_bitlen[code-256]
1620 block_bitlen = block_bitlen + extra_bits_bitlen
1621 end
1622 local dist_code = dcodes[length_code_count]
1623 local dist_huffman_bitlen = dcodes_huffman_bitlens[dist_code]
1624 block_bitlen = block_bitlen + dist_huffman_bitlen
1625
1626 if dist_code > 3 then -- dist code with extra bits
1627 local dist_extra_bits_bitlen = (dist_code-dist_code%2)/2 - 1
1628 block_bitlen = block_bitlen + dist_extra_bits_bitlen
1629 end
1630 end
1631 end
1632 return block_bitlen
1633end
1634
1635-- Write dynamic block.
1636-- @param ... Read the source code of GetBlockDynamicHuffmanHeader()
1637local function CompressDynamicHuffmanBlock(WriteBits, is_last_block
1638 , lcodes, lextra_bits, dcodes, dextra_bits, HLIT, HDIST, HCLEN
1639 , rle_codes_huffman_bitlens, rle_codes_huffman_codes
1640 , rle_deflate_codes, rle_extra_bits
1641 , lcodes_huffman_bitlens, lcodes_huffman_codes
1642 , dcodes_huffman_bitlens, dcodes_huffman_codes)
1643
1644 WriteBits(is_last_block and 1 or 0, 1) -- Last block identifier
1645 WriteBits(2, 2) -- Dynamic Huffman block identifier
1646
1647 WriteBits(HLIT, 5)
1648 WriteBits(HDIST, 5)
1649 WriteBits(HCLEN, 4)
1650
1651 for i = 1, HCLEN+4 do
1652 local symbol = _rle_codes_huffman_bitlen_order[i]
1653 local length = rle_codes_huffman_bitlens[symbol] or 0
1654 WriteBits(length, 3)
1655 end
1656
1657 local rleExtraBitsIndex = 1
1658 for i=1, #rle_deflate_codes do
1659 local code = rle_deflate_codes[i]
1660 WriteBits(rle_codes_huffman_codes[code]
1661 , rle_codes_huffman_bitlens[code])
1662 if code >= 16 then
1663 local extraBits = rle_extra_bits[rleExtraBitsIndex]
1664 WriteBits(extraBits, (code == 16) and 2 or (code == 17 and 3 or 7))
1665 rleExtraBitsIndex = rleExtraBitsIndex + 1
1666 end
1667 end
1668
1669 local length_code_count = 0
1670 local length_code_with_extra_count = 0
1671 local dist_code_with_extra_count = 0
1672
1673 for i=1, #lcodes do
1674 local deflate_codee = lcodes[i]
1675 local huffman_code = lcodes_huffman_codes[deflate_codee]
1676 local huffman_bitlen = lcodes_huffman_bitlens[deflate_codee]
1677 WriteBits(huffman_code, huffman_bitlen)
1678 if deflate_codee > 256 then -- Length code
1679 length_code_count = length_code_count + 1
1680 if deflate_codee > 264 and deflate_codee < 285 then
1681 -- Length code with extra bits
1682 length_code_with_extra_count = length_code_with_extra_count + 1
1683 local extra_bits = lextra_bits[length_code_with_extra_count]
1684 local extra_bits_bitlen =
1685 _literal_deflate_code_to_extra_bitlen[deflate_codee-256]
1686 WriteBits(extra_bits, extra_bits_bitlen)
1687 end
1688 -- Write distance code
1689 local dist_deflate_code = dcodes[length_code_count]
1690 local dist_huffman_code = dcodes_huffman_codes[dist_deflate_code]
1691 local dist_huffman_bitlen =
1692 dcodes_huffman_bitlens[dist_deflate_code]
1693 WriteBits(dist_huffman_code, dist_huffman_bitlen)
1694
1695 if dist_deflate_code > 3 then -- dist code with extra bits
1696 dist_code_with_extra_count = dist_code_with_extra_count + 1
1697 local dist_extra_bits = dextra_bits[dist_code_with_extra_count]
1698 local dist_extra_bits_bitlen =
1699 (dist_deflate_code-dist_deflate_code%2)/2 - 1
1700 WriteBits(dist_extra_bits, dist_extra_bits_bitlen)
1701 end
1702 end
1703 end
1704end
1705
1706-- Get the size of fixed block without writing any bits into the writer.
1707-- @param lcodes literal/LZ77_length deflate codes
1708-- @param decodes LZ77 distance deflate codes
1709-- @return the bit length of the fixed block
1710local function GetFixedHuffmanBlockSize(lcodes, dcodes)
1711 local block_bitlen = 3
1712 local length_code_count = 0
1713 for i=1, #lcodes do
1714 local code = lcodes[i]
1715 local huffman_bitlen = _fix_block_literal_huffman_bitlen[code]
1716 block_bitlen = block_bitlen + huffman_bitlen
1717 if code > 256 then -- Length code
1718 length_code_count = length_code_count + 1
1719 if code > 264 and code < 285 then -- Length code with extra bits
1720 local extra_bits_bitlen =
1721 _literal_deflate_code_to_extra_bitlen[code-256]
1722 block_bitlen = block_bitlen + extra_bits_bitlen
1723 end
1724 local dist_code = dcodes[length_code_count]
1725 block_bitlen = block_bitlen + 5
1726
1727 if dist_code > 3 then -- dist code with extra bits
1728 local dist_extra_bits_bitlen =
1729 (dist_code-dist_code%2)/2 - 1
1730 block_bitlen = block_bitlen + dist_extra_bits_bitlen
1731 end
1732 end
1733 end
1734 return block_bitlen
1735end
1736
1737-- Write fixed block.
1738-- @param lcodes literal/LZ77_length deflate codes
1739-- @param decodes LZ77 distance deflate codes
1740local function CompressFixedHuffmanBlock(WriteBits, is_last_block,
1741 lcodes, lextra_bits, dcodes, dextra_bits)
1742 WriteBits(is_last_block and 1 or 0, 1) -- Last block identifier
1743 WriteBits(1, 2) -- Fixed Huffman block identifier
1744 local length_code_count = 0
1745 local length_code_with_extra_count = 0
1746 local dist_code_with_extra_count = 0
1747 for i=1, #lcodes do
1748 local deflate_code = lcodes[i]
1749 local huffman_code = _fix_block_literal_huffman_code[deflate_code]
1750 local huffman_bitlen = _fix_block_literal_huffman_bitlen[deflate_code]
1751 WriteBits(huffman_code, huffman_bitlen)
1752 if deflate_code > 256 then -- Length code
1753 length_code_count = length_code_count + 1
1754 if deflate_code > 264 and deflate_code < 285 then
1755 -- Length code with extra bits
1756 length_code_with_extra_count = length_code_with_extra_count + 1
1757 local extra_bits = lextra_bits[length_code_with_extra_count]
1758 local extra_bits_bitlen =
1759 _literal_deflate_code_to_extra_bitlen[deflate_code-256]
1760 WriteBits(extra_bits, extra_bits_bitlen)
1761 end
1762 -- Write distance code
1763 local dist_code = dcodes[length_code_count]
1764 local dist_huffman_code = _fix_block_dist_huffman_code[dist_code]
1765 WriteBits(dist_huffman_code, 5)
1766
1767 if dist_code > 3 then -- dist code with extra bits
1768 dist_code_with_extra_count = dist_code_with_extra_count + 1
1769 local dist_extra_bits = dextra_bits[dist_code_with_extra_count]
1770 local dist_extra_bits_bitlen = (dist_code-dist_code%2)/2 - 1
1771 WriteBits(dist_extra_bits, dist_extra_bits_bitlen)
1772 end
1773 end
1774 end
1775end
1776
1777-- Get the size of store block without writing any bits into the writer.
1778-- @param block_start The start index of the origin input string
1779-- @param block_end The end index of the origin input string
1780-- @param Total bit lens had been written into the compressed result before,
1781-- because store block needs to shift to byte boundary.
1782-- @return the bit length of the fixed block
1783local function GetStoreBlockSize(block_start, block_end, total_bitlen)
1784 assert(block_end-block_start+1 <= 65535)
1785 local block_bitlen = 3
1786 total_bitlen = total_bitlen + 3
1787 local padding_bitlen = (8-total_bitlen%8)%8
1788 block_bitlen = block_bitlen + padding_bitlen
1789 block_bitlen = block_bitlen + 32
1790 block_bitlen = block_bitlen + (block_end - block_start + 1) * 8
1791 return block_bitlen
1792end
1793
1794-- Write the store block.
1795-- @param ... lots of stuffs
1796-- @return nil
1797local function CompressStoreBlock(WriteBits, WriteString, is_last_block, str
1798 , block_start, block_end, total_bitlen)
1799 assert(block_end-block_start+1 <= 65535)
1800 WriteBits(is_last_block and 1 or 0, 1) -- Last block identifer.
1801 WriteBits(0, 2) -- Store block identifier.
1802 total_bitlen = total_bitlen + 3
1803 local padding_bitlen = (8-total_bitlen%8)%8
1804 if padding_bitlen > 0 then
1805 WriteBits(_pow2[padding_bitlen]-1, padding_bitlen)
1806 end
1807 local size = block_end - block_start + 1
1808 WriteBits(size, 16)
1809
1810 -- Write size's one's complement
1811 local comp = (255 - size % 256) + (255 - (size-size%256)/256)*256
1812 WriteBits(comp, 16)
1813
1814 WriteString(str:sub(block_start, block_end))
1815end
1816
1817-- Do the deflate
1818-- Currently using a simple way to determine the block size
1819-- (This is why the compression ratio is little bit worse than zlib when
1820-- the input size is very large
1821-- The first block is 64KB, the following block is 32KB.
1822-- After each block, there is a memory cleanup operation.
1823-- This is not a fast operation, but it is needed to save memory usage, so
1824-- the memory usage does not grow unboundly. If the data size is less than
1825-- 64KB, then memory cleanup won't happen.
1826-- This function determines whether to use store/fixed/dynamic blocks by
1827-- calculating the block size of each block type and chooses the smallest one.
1828local function Deflate(configs, WriteBits, WriteString, FlushWriter, str
1829 , dictionary)
1830 local string_table = {}
1831 local hash_tables = {}
1832 local is_last_block = nil
1833 local block_start
1834 local block_end
1835 local bitlen_written
1836 local total_bitlen = FlushWriter(_FLUSH_MODE_NO_FLUSH)
1837 local strlen = #str
1838 local offset
1839
1840 local level
1841 local strategy
1842 if configs then
1843 if configs.level then
1844 level = configs.level
1845 end
1846 if configs.strategy then
1847 strategy = configs.strategy
1848 end
1849 end
1850
1851 if not level then
1852 if strlen < 2048 then
1853 level = 7
1854 elseif strlen > 65536 then
1855 level = 3
1856 else
1857 level = 5
1858 end
1859 end
1860
1861 while not is_last_block do
1862 if not block_start then
1863 block_start = 1
1864 block_end = 64*1024 - 1
1865 offset = 0
1866 else
1867 block_start = block_end + 1
1868 block_end = block_end + 32*1024
1869 offset = block_start - 32*1024 - 1
1870 end
1871
1872 if block_end >= strlen then
1873 block_end = strlen
1874 is_last_block = true
1875 else
1876 is_last_block = false
1877 end
1878
1879 local lcodes, lextra_bits, lcodes_counts, dcodes, dextra_bits
1880 , dcodes_counts
1881
1882 local HLIT, HDIST, HCLEN, rle_codes_huffman_bitlens
1883 , rle_codes_huffman_codes, rle_deflate_codes
1884 , rle_extra_bits, lcodes_huffman_bitlens, lcodes_huffman_codes
1885 , dcodes_huffman_bitlens, dcodes_huffman_codes
1886
1887 local dynamic_block_bitlen
1888 local fixed_block_bitlen
1889 local store_block_bitlen
1890
1891 if level ~= 0 then
1892
1893 -- GetBlockLZ77 needs block_start to block_end+3 to be loaded.
1894 LoadStringToTable(str, string_table, block_start, block_end + 3
1895 , offset)
1896 if block_start == 1 and dictionary then
1897 local dict_string_table = dictionary.string_table
1898 local dict_strlen = dictionary.strlen
1899 for i=0, (-dict_strlen+1)<-257
1900 and -257 or (-dict_strlen+1), -1 do
1901 string_table[i] = dict_string_table[dict_strlen+i]
1902 end
1903 end
1904
1905 if strategy == "huffman_only" then
1906 lcodes = {}
1907 LoadStringToTable(str, lcodes, block_start, block_end
1908 , block_start-1)
1909 lextra_bits = {}
1910 lcodes_counts = {}
1911 lcodes[block_end - block_start+2] = 256 -- end of block
1912 for i=1, block_end - block_start+2 do
1913 local code = lcodes[i]
1914 lcodes_counts[code] = (lcodes_counts[code] or 0) + 1
1915 end
1916 dcodes = {}
1917 dextra_bits = {}
1918 dcodes_counts = {}
1919 else
1920 lcodes, lextra_bits, lcodes_counts, dcodes, dextra_bits
1921 , dcodes_counts = GetBlockLZ77Result(level, string_table
1922 , hash_tables, block_start, block_end, offset, dictionary
1923 )
1924 end
1925
1926 HLIT, HDIST, HCLEN, rle_codes_huffman_bitlens
1927 , rle_codes_huffman_codes, rle_deflate_codes
1928 , rle_extra_bits, lcodes_huffman_bitlens, lcodes_huffman_codes
1929 , dcodes_huffman_bitlens, dcodes_huffman_codes =
1930 GetBlockDynamicHuffmanHeader(lcodes_counts, dcodes_counts)
1931 dynamic_block_bitlen = GetDynamicHuffmanBlockSize(
1932 lcodes, dcodes, HCLEN, rle_codes_huffman_bitlens
1933 , rle_deflate_codes, lcodes_huffman_bitlens
1934 , dcodes_huffman_bitlens)
1935 fixed_block_bitlen = GetFixedHuffmanBlockSize(lcodes, dcodes)
1936 end
1937
1938 store_block_bitlen = GetStoreBlockSize(block_start, block_end
1939 , total_bitlen)
1940
1941 local min_bitlen = store_block_bitlen
1942 min_bitlen = (fixed_block_bitlen and fixed_block_bitlen < min_bitlen)
1943 and fixed_block_bitlen or min_bitlen
1944 min_bitlen = (dynamic_block_bitlen
1945 and dynamic_block_bitlen < min_bitlen)
1946 and dynamic_block_bitlen or min_bitlen
1947
1948 if level == 0 or (strategy ~= "fixed" and strategy ~= "dynamic" and
1949 store_block_bitlen == min_bitlen) then
1950 CompressStoreBlock(WriteBits, WriteString, is_last_block
1951 , str, block_start, block_end, total_bitlen)
1952 total_bitlen = total_bitlen + store_block_bitlen
1953 elseif strategy ~= "dynamic" and (
1954 strategy == "fixed" or fixed_block_bitlen == min_bitlen) then
1955 CompressFixedHuffmanBlock(WriteBits, is_last_block,
1956 lcodes, lextra_bits, dcodes, dextra_bits)
1957 total_bitlen = total_bitlen + fixed_block_bitlen
1958 elseif strategy == "dynamic" or dynamic_block_bitlen == min_bitlen then
1959 CompressDynamicHuffmanBlock(WriteBits, is_last_block, lcodes
1960 , lextra_bits, dcodes, dextra_bits, HLIT, HDIST, HCLEN
1961 , rle_codes_huffman_bitlens, rle_codes_huffman_codes
1962 , rle_deflate_codes, rle_extra_bits
1963 , lcodes_huffman_bitlens, lcodes_huffman_codes
1964 , dcodes_huffman_bitlens, dcodes_huffman_codes)
1965 total_bitlen = total_bitlen + dynamic_block_bitlen
1966 end
1967
1968 if is_last_block then
1969 bitlen_written = FlushWriter(_FLUSH_MODE_NO_FLUSH)
1970 else
1971 bitlen_written = FlushWriter(_FLUSH_MODE_MEMORY_CLEANUP)
1972 end
1973
1974 assert(bitlen_written == total_bitlen)
1975
1976 -- Memory clean up, so memory consumption does not always grow linearly
1977 -- , even if input string is > 64K.
1978 -- Not a very efficient operation, but this operation won't happen
1979 -- when the input data size is less than 64K.
1980 if not is_last_block then
1981 local j
1982 if dictionary and block_start == 1 then
1983 j = 0
1984 while (string_table[j]) do
1985 string_table[j] = nil
1986 j = j - 1
1987 end
1988 end
1989 dictionary = nil
1990 j = 1
1991 for i = block_end-32767, block_end do
1992 string_table[j] = string_table[i-offset]
1993 j = j + 1
1994 end
1995 for k, t in pairs(hash_tables) do
1996 local tSize = #t
1997 if tSize > 0 and block_end+1 - t[1] > 32768 then
1998 if tSize == 1 then
1999 --hash_tables[k] = nil -- seems to be causing problems
2000 else
2001 local new = {}
2002 local newSize = 0
2003 for i = 2, tSize do
2004 j = t[i]
2005 if block_end+1 - j <= 32768 then
2006 newSize = newSize + 1
2007 new[newSize] = j
2008 end
2009 end
2010 hash_tables[k] = new
2011 end
2012 end
2013 end
2014 end
2015
2016 if os and os.pullEvent then -- ComputerCraft requires this for long-running processes
2017 os.queueEvent("nosleep")
2018 os.pullEvent()
2019 end
2020 end
2021end
2022
2023--- The description to compression configuration table. <br>
2024-- Any field can be nil to use its default. <br>
2025-- Table with keys other than those below is an invalid table.
2026-- @class table
2027-- @name compression_configs
2028-- @field level The compression level ranged from 0 to 9. 0 is no compression.
2029-- 9 is the slowest but best compression. Use nil for default level.
2030-- @field strategy The compression strategy. "fixed" to only use fixed deflate
2031-- compression block. "dynamic" to only use dynamic block. "huffman_only" to
2032-- do no LZ77 compression. Only do huffman compression.
2033
2034
2035-- @see LibDeflate:CompressDeflate(str, configs)
2036-- @see LibDeflate:CompressDeflateWithDict(str, dictionary, configs)
2037local function CompressDeflateInternal(str, dictionary, configs)
2038 local WriteBits, WriteString, FlushWriter = CreateWriter()
2039 Deflate(configs, WriteBits, WriteString, FlushWriter, str, dictionary)
2040 local total_bitlen, result = FlushWriter(_FLUSH_MODE_OUTPUT)
2041 local padding_bitlen = (8-total_bitlen%8)%8
2042 return result, padding_bitlen
2043end
2044
2045-- @see LibDeflate:CompressZlib
2046-- @see LibDeflate:CompressZlibWithDict
2047local function CompressZlibInternal(str, dictionary, configs)
2048 local WriteBits, WriteString, FlushWriter = CreateWriter()
2049
2050 local CM = 8 -- Compression method
2051 local CINFO = 7 --Window Size = 32K
2052 local CMF = CINFO*16+CM
2053 WriteBits(CMF, 8)
2054
2055 local FDIST = dictionary and 1 or 0
2056 local FLEVEL = 2 -- Default compression
2057 local FLG = FLEVEL*64+FDIST*32
2058 local FCHECK = (31-(CMF*256+FLG)%31)
2059 -- The FCHECK value must be such that CMF and FLG,
2060 -- when viewed as a 16-bit unsigned integer stored
2061 -- in MSB order (CMF*256 + FLG), is a multiple of 31.
2062 FLG = FLG + FCHECK
2063 WriteBits(FLG, 8)
2064
2065 if FDIST == 1 then
2066 local adler32 = dictionary.adler32
2067 local byte0 = adler32 % 256
2068 adler32 = (adler32 - byte0) / 256
2069 local byte1 = adler32 % 256
2070 adler32 = (adler32 - byte1) / 256
2071 local byte2 = adler32 % 256
2072 adler32 = (adler32 - byte2) / 256
2073 local byte3 = adler32 % 256
2074 WriteBits(byte3, 8)
2075 WriteBits(byte2, 8)
2076 WriteBits(byte1, 8)
2077 WriteBits(byte0, 8)
2078 end
2079
2080 Deflate(configs, WriteBits, WriteString, FlushWriter, str, dictionary)
2081 FlushWriter(_FLUSH_MODE_BYTE_BOUNDARY)
2082
2083 local adler32 = LibDeflate:Adler32(str)
2084
2085 -- Most significant byte first
2086 local byte3 = adler32%256
2087 adler32 = (adler32 - byte3) / 256
2088 local byte2 = adler32%256
2089 adler32 = (adler32 - byte2) / 256
2090 local byte1 = adler32%256
2091 adler32 = (adler32 - byte1) / 256
2092 local byte0 = adler32%256
2093
2094 WriteBits(byte0, 8)
2095 WriteBits(byte1, 8)
2096 WriteBits(byte2, 8)
2097 WriteBits(byte3, 8)
2098 local total_bitlen, result = FlushWriter(_FLUSH_MODE_OUTPUT)
2099 local padding_bitlen = (8-total_bitlen%8)%8
2100 return result, padding_bitlen
2101end
2102
2103--- Compress using the raw deflate format.
2104-- @param str [string] The data to be compressed.
2105-- @param configs [table/nil] The configuration table to control the compression
2106-- . If nil, use the default configuration.
2107-- @return [string] The compressed data.
2108-- @return [integer] The number of bits padded at the end of output.
2109-- 0 <= bits < 8 <br>
2110-- This means the most significant "bits" of the last byte of the returned
2111-- compressed data are padding bits and they don't affect decompression.
2112-- You don't need to use this value unless you want to do some postprocessing
2113-- to the compressed data.
2114-- @see compression_configs
2115-- @see LibDeflate:DecompressDeflate
2116function LibDeflate:CompressDeflate(str, configs)
2117 local arg_valid, arg_err = IsValidArguments(str, false, nil, true, configs)
2118 if not arg_valid then
2119 error(("Usage: LibDeflate:CompressDeflate(str, configs): "
2120 ..arg_err), 2)
2121 end
2122 return CompressDeflateInternal(str, nil, configs)
2123end
2124
2125--- Compress using the raw deflate format with a preset dictionary.
2126-- @param str [string] The data to be compressed.
2127-- @param dictionary [table] The preset dictionary produced by
2128-- LibDeflate:CreateDictionary
2129-- @param configs [table/nil] The configuration table to control the compression
2130-- . If nil, use the default configuration.
2131-- @return [string] The compressed data.
2132-- @return [integer] The number of bits padded at the end of output.
2133-- 0 <= bits < 8 <br>
2134-- This means the most significant "bits" of the last byte of the returned
2135-- compressed data are padding bits and they don't affect decompression.
2136-- You don't need to use this value unless you want to do some postprocessing
2137-- to the compressed data.
2138-- @see compression_configs
2139-- @see LibDeflate:CreateDictionary
2140-- @see LibDeflate:DecompressDeflateWithDict
2141function LibDeflate:CompressDeflateWithDict(str, dictionary, configs)
2142 local arg_valid, arg_err = IsValidArguments(str, true, dictionary
2143 , true, configs)
2144 if not arg_valid then
2145 error(("Usage: LibDeflate:CompressDeflateWithDict"
2146 .."(str, dictionary, configs): "
2147 ..arg_err), 2)
2148 end
2149 return CompressDeflateInternal(str, dictionary, configs)
2150end
2151
2152--- Compress using the zlib format.
2153-- @param str [string] the data to be compressed.
2154-- @param configs [table/nil] The configuration table to control the compression
2155-- . If nil, use the default configuration.
2156-- @return [string] The compressed data.
2157-- @return [integer] The number of bits padded at the end of output.
2158-- Should always be 0.
2159-- Zlib formatted compressed data never has padding bits at the end.
2160-- @see compression_configs
2161-- @see LibDeflate:DecompressZlib
2162function LibDeflate:CompressZlib(str, configs)
2163 local arg_valid, arg_err = IsValidArguments(str, false, nil, true, configs)
2164 if not arg_valid then
2165 error(("Usage: LibDeflate:CompressZlib(str, configs): "
2166 ..arg_err), 2)
2167 end
2168 return CompressZlibInternal(str, nil, configs)
2169end
2170
2171--- Compress using the zlib format with a preset dictionary.
2172-- @param str [string] the data to be compressed.
2173-- @param dictionary [table] A preset dictionary produced
2174-- by LibDeflate:CreateDictionary()
2175-- @param configs [table/nil] The configuration table to control the compression
2176-- . If nil, use the default configuration.
2177-- @return [string] The compressed data.
2178-- @return [integer] The number of bits padded at the end of output.
2179-- Should always be 0.
2180-- Zlib formatted compressed data never has padding bits at the end.
2181-- @see compression_configs
2182-- @see LibDeflate:CreateDictionary
2183-- @see LibDeflate:DecompressZlibWithDict
2184function LibDeflate:CompressZlibWithDict(str, dictionary, configs)
2185 local arg_valid, arg_err = IsValidArguments(str, true, dictionary
2186 , true, configs)
2187 if not arg_valid then
2188 error(("Usage: LibDeflate:CompressZlibWithDict"
2189 .."(str, dictionary, configs): "
2190 ..arg_err), 2)
2191 end
2192 return CompressZlibInternal(str, dictionary, configs)
2193end
2194
2195local function time()
2196 if os.epoch ~= nil then return math.floor(os.epoch("utc") / 1000)
2197 -- ComputerCraft's os.time() gives in-game time, os.epoch gives POSIX time in ms
2198 elseif os.time() < 30 then return 0 -- ComputerCraft 1.79 and below don't have os.epoch(), so no time.
2199 else return os.time() end -- All other Luas.
2200end
2201
2202local function byte(num, b) return band(rshift(num, b * 8), 0xFF) end
2203
2204--- Compress using the gzip format.
2205-- @param str [string] the data to be compressed.
2206-- @param configs [table/nil] The configuration table to control the compression
2207-- . If nil, use the default configuration.
2208-- @return [string] The compressed data with gzip headers.
2209-- @see compression_configs
2210-- @see LibDeflate:DecompressGzip
2211function LibDeflate:CompressGzip(str, configs)
2212 local arg_valid, arg_err = IsValidArguments(str, false, nil, true, configs)
2213 if not arg_valid then
2214 error(("Usage: LibDeflate:CompressGzip(str, configs): "
2215 ..arg_err), 2)
2216 end
2217 local res, err = CompressDeflateInternal(str, nil, configs)
2218 if res == nil then return res, err end
2219 local t = time()
2220 local cf = 0
2221 local crc = self:CRC32(str)
2222 local len = string.len(str)
2223 if configs ~= nil and configs.level ~= nil then
2224 if configs.level == 0 then cf = 0x04
2225 elseif configs.level == 9 then cf = 0x02 end
2226 end
2227 return string_char(0x1f, 0x8b, 8, 0, byte(t, 0), byte(t, 1), byte(t, 2),
2228 byte(t, 3), cf, 0xFF) .. res .. string_char(byte(crc, 0), byte(crc, 1),
2229 byte(crc, 2), byte(crc, 3), byte(len, 0), byte(len, 1), byte(len, 2), byte(len, 3))
2230end
2231
2232--[[ --------------------------------------------------------------------------
2233 Decompress code
2234--]] --------------------------------------------------------------------------
2235
2236--[[
2237 Create a reader to easily reader stuffs as the unit of bits.
2238 Return values:
2239 1. ReadBits(bitlen)
2240 2. ReadBytes(bytelen, buffer, buffer_size)
2241 3. Decode(huffman_bitlen_count, huffman_symbol, min_bitlen)
2242 4. ReaderBitlenLeft()
2243 5. SkipToByteBoundary()
2244--]]
2245local function CreateReader(input_string)
2246 local input = input_string
2247 local input_strlen = #input_string
2248 local input_next_byte_pos = 1
2249 local cache_bitlen = 0
2250 local cache = 0
2251
2252 -- Read some bits.
2253 -- To improve speed, this function does not
2254 -- check if the input has been exhausted.
2255 -- Use ReaderBitlenLeft() < 0 to check it.
2256 -- @param bitlen the number of bits to read
2257 -- @return the data is read.
2258 local function ReadBits(bitlen)
2259 local rshift_mask = _pow2[bitlen]
2260 local code
2261 if bitlen <= cache_bitlen then
2262 code = cache % rshift_mask
2263 cache = (cache - code) / rshift_mask
2264 cache_bitlen = cache_bitlen - bitlen
2265 else -- Whether input has been exhausted is not checked.
2266 local lshift_mask = _pow2[cache_bitlen]
2267 local byte1, byte2, byte3, byte4 = string_byte(input
2268 , input_next_byte_pos, input_next_byte_pos+3)
2269 -- This requires lua number to be at least double ()
2270 cache = cache + ((byte1 or 0)+(byte2 or 0)*256
2271 + (byte3 or 0)*65536+(byte4 or 0)*16777216)*lshift_mask
2272 input_next_byte_pos = input_next_byte_pos + 4
2273 cache_bitlen = cache_bitlen + 32 - bitlen
2274 code = cache % rshift_mask
2275 cache = (cache - code) / rshift_mask
2276 end
2277 return code
2278 end
2279
2280 -- Read some bytes from the reader.
2281 -- Assume reader is on the byte boundary.
2282 -- @param bytelen The number of bytes to be read.
2283 -- @param buffer The byte read will be stored into this buffer.
2284 -- @param buffer_size The buffer will be modified starting from
2285 -- buffer[buffer_size+1], ending at buffer[buffer_size+bytelen-1]
2286 -- @return the new buffer_size
2287 local function ReadBytes(bytelen, buffer, buffer_size)
2288 assert(cache_bitlen % 8 == 0)
2289
2290 local byte_from_cache = (cache_bitlen/8 < bytelen)
2291 and (cache_bitlen/8) or bytelen
2292 for _=1, byte_from_cache do
2293 local byte = cache % 256
2294 buffer_size = buffer_size + 1
2295 buffer[buffer_size] = string_char(byte)
2296 cache = (cache - byte) / 256
2297 end
2298 cache_bitlen = cache_bitlen - byte_from_cache*8
2299 bytelen = bytelen - byte_from_cache
2300 if (input_strlen - input_next_byte_pos - bytelen + 1) * 8
2301 + cache_bitlen < 0 then
2302 return -1 -- out of input
2303 end
2304 for i=input_next_byte_pos, input_next_byte_pos+bytelen-1 do
2305 buffer_size = buffer_size + 1
2306 buffer[buffer_size] = string_sub(input, i, i)
2307 end
2308
2309 input_next_byte_pos = input_next_byte_pos + bytelen
2310 return buffer_size
2311 end
2312
2313 -- Decode huffman code
2314 -- To improve speed, this function does not check
2315 -- if the input has been exhausted.
2316 -- Use ReaderBitlenLeft() < 0 to check it.
2317 -- Credits for Mark Adler. This code is from puff:Decode()
2318 -- @see puff:Decode(...)
2319 -- @param huffman_bitlen_count
2320 -- @param huffman_symbol
2321 -- @param min_bitlen The minimum huffman bit length of all symbols
2322 -- @return The decoded deflate code.
2323 -- Negative value is returned if decoding fails.
2324 local function Decode(huffman_bitlen_counts, huffman_symbols, min_bitlen)
2325 local code = 0
2326 local first = 0
2327 local index = 0
2328 local count
2329 if min_bitlen > 0 then
2330 if cache_bitlen < 15 and input then
2331 local lshift_mask = _pow2[cache_bitlen]
2332 local byte1, byte2, byte3, byte4 =
2333 string_byte(input, input_next_byte_pos
2334 , input_next_byte_pos+3)
2335 -- This requires lua number to be at least double ()
2336 cache = cache + ((byte1 or 0)+(byte2 or 0)*256
2337 +(byte3 or 0)*65536+(byte4 or 0)*16777216)*lshift_mask
2338 input_next_byte_pos = input_next_byte_pos + 4
2339 cache_bitlen = cache_bitlen + 32
2340 end
2341
2342 local rshift_mask = _pow2[min_bitlen]
2343 cache_bitlen = cache_bitlen - min_bitlen
2344 code = cache % rshift_mask
2345 cache = (cache - code) / rshift_mask
2346 -- Reverse the bits
2347 code = _reverse_bits_tbl[min_bitlen][code]
2348
2349 count = huffman_bitlen_counts[min_bitlen]
2350 if code < count then
2351 return huffman_symbols[code]
2352 end
2353 index = count
2354 first = count * 2
2355 code = code * 2
2356 end
2357
2358 for bitlen = min_bitlen+1, 15 do
2359 local bit
2360 bit = cache % 2
2361 cache = (cache - bit) / 2
2362 cache_bitlen = cache_bitlen - 1
2363
2364 code = (bit==1) and (code + 1 - code % 2) or code
2365 count = huffman_bitlen_counts[bitlen] or 0
2366 local diff = code - first
2367 if diff < count then
2368 return huffman_symbols[index + diff]
2369 end
2370 index = index + count
2371 first = first + count
2372 first = first * 2
2373 code = code * 2
2374 end
2375 -- invalid literal/length or distance code
2376 -- in fixed or dynamic block (run out of code)
2377 return -10
2378 end
2379
2380 local function ReaderBitlenLeft()
2381 return (input_strlen - input_next_byte_pos + 1) * 8 + cache_bitlen
2382 end
2383
2384 local function SkipToByteBoundary()
2385 local skipped_bitlen = cache_bitlen%8
2386 local rshift_mask = _pow2[skipped_bitlen]
2387 cache_bitlen = cache_bitlen - skipped_bitlen
2388 cache = (cache - cache % rshift_mask) / rshift_mask
2389 end
2390
2391 return ReadBits, ReadBytes, Decode, ReaderBitlenLeft, SkipToByteBoundary
2392end
2393
2394-- Create a deflate state, so I can pass in less arguments to functions.
2395-- @param str the whole string to be decompressed.
2396-- @param dictionary The preset dictionary. nil if not provided.
2397-- This dictionary should be produced by LibDeflate:CreateDictionary(str)
2398-- @return The decomrpess state.
2399local function CreateDecompressState(str, dictionary)
2400 local ReadBits, ReadBytes, Decode, ReaderBitlenLeft
2401 , SkipToByteBoundary = CreateReader(str)
2402 local state =
2403 {
2404 ReadBits = ReadBits,
2405 ReadBytes = ReadBytes,
2406 Decode = Decode,
2407 ReaderBitlenLeft = ReaderBitlenLeft,
2408 SkipToByteBoundary = SkipToByteBoundary,
2409 buffer_size = 0,
2410 buffer = {},
2411 result_buffer = {},
2412 dictionary = dictionary,
2413 }
2414 return state
2415end
2416
2417-- Get the stuffs needed to decode huffman codes
2418-- @see puff.c:construct(...)
2419-- @param huffman_bitlen The huffman bit length of the huffman codes.
2420-- @param max_symbol The maximum symbol
2421-- @param max_bitlen The min huffman bit length of all codes
2422-- @return zero or positive for success, negative for failure.
2423-- @return The count of each huffman bit length.
2424-- @return A table to convert huffman codes to deflate codes.
2425-- @return The minimum huffman bit length.
2426local function GetHuffmanForDecode(huffman_bitlens, max_symbol, max_bitlen)
2427 local huffman_bitlen_counts = {}
2428 local min_bitlen = max_bitlen
2429 for symbol = 0, max_symbol do
2430 local bitlen = huffman_bitlens[symbol] or 0
2431 min_bitlen = (bitlen > 0 and bitlen < min_bitlen)
2432 and bitlen or min_bitlen
2433 huffman_bitlen_counts[bitlen] = (huffman_bitlen_counts[bitlen] or 0)+1
2434 end
2435
2436 if huffman_bitlen_counts[0] == max_symbol+1 then -- No Codes
2437 return 0, huffman_bitlen_counts, {}, 0 -- Complete, but decode will fail
2438 end
2439
2440 local left = 1
2441 for len = 1, max_bitlen do
2442 left = left * 2
2443 left = left - (huffman_bitlen_counts[len] or 0)
2444 if left < 0 then
2445 return left -- Over-subscribed, return negative
2446 end
2447 end
2448
2449 -- Generate offsets info symbol table for each length for sorting
2450 local offsets = {}
2451 offsets[1] = 0
2452 for len = 1, max_bitlen-1 do
2453 offsets[len + 1] = offsets[len] + (huffman_bitlen_counts[len] or 0)
2454 end
2455
2456 local huffman_symbols = {}
2457 for symbol = 0, max_symbol do
2458 local bitlen = huffman_bitlens[symbol] or 0
2459 if bitlen ~= 0 then
2460 local offset = offsets[bitlen]
2461 huffman_symbols[offset] = symbol
2462 offsets[bitlen] = offsets[bitlen] + 1
2463 end
2464 end
2465
2466 -- Return zero for complete set, positive for incomplete set.
2467 return left, huffman_bitlen_counts, huffman_symbols, min_bitlen
2468end
2469
2470-- Decode a fixed or dynamic huffman blocks, excluding last block identifier
2471-- and block type identifer.
2472-- @see puff.c:codes()
2473-- @param state decompression state that will be modified by this function.
2474-- @see CreateDecompressState
2475-- @param ... Read the source code
2476-- @return 0 on success, other value on failure.
2477local function DecodeUntilEndOfBlock(state, lcodes_huffman_bitlens
2478 , lcodes_huffman_symbols, lcodes_huffman_min_bitlen
2479 , dcodes_huffman_bitlens, dcodes_huffman_symbols
2480 , dcodes_huffman_min_bitlen)
2481 local buffer, buffer_size, ReadBits, Decode, ReaderBitlenLeft
2482 , result_buffer =
2483 state.buffer, state.buffer_size, state.ReadBits, state.Decode
2484 , state.ReaderBitlenLeft, state.result_buffer
2485 local dictionary = state.dictionary
2486 local dict_string_table
2487 local dict_strlen
2488
2489 local buffer_end = 1
2490 if dictionary and not buffer[0] then
2491 -- If there is a dictionary, copy the last 258 bytes into
2492 -- the string_table to make the copy in the main loop quicker.
2493 -- This is done only once per decompression.
2494 dict_string_table = dictionary.string_table
2495 dict_strlen = dictionary.strlen
2496 buffer_end = -dict_strlen + 1
2497 for i=0, (-dict_strlen+1)<-257 and -257 or (-dict_strlen+1), -1 do
2498 buffer[i] = _byte_to_char[dict_string_table[dict_strlen+i]]
2499 end
2500 end
2501
2502 repeat
2503 local symbol = Decode(lcodes_huffman_bitlens
2504 , lcodes_huffman_symbols, lcodes_huffman_min_bitlen)
2505 if symbol < 0 or symbol > 285 then
2506 -- invalid literal/length or distance code in fixed or dynamic block
2507 return -10
2508 elseif symbol < 256 then -- Literal
2509 buffer_size = buffer_size + 1
2510 buffer[buffer_size] = _byte_to_char[symbol]
2511 elseif symbol > 256 then -- Length code
2512 symbol = symbol - 256
2513 local bitlen = _literal_deflate_code_to_base_len[symbol]
2514 bitlen = (symbol >= 8)
2515 and (bitlen
2516 + ReadBits(_literal_deflate_code_to_extra_bitlen[symbol]))
2517 or bitlen
2518 symbol = Decode(dcodes_huffman_bitlens, dcodes_huffman_symbols
2519 , dcodes_huffman_min_bitlen)
2520 if symbol < 0 or symbol > 29 then
2521 -- invalid literal/length or distance code in fixed or dynamic block
2522 return -10
2523 end
2524 local dist = _dist_deflate_code_to_base_dist[symbol]
2525 dist = (dist > 4) and (dist
2526 + ReadBits(_dist_deflate_code_to_extra_bitlen[symbol])) or dist
2527
2528 local char_buffer_index = buffer_size-dist+1
2529 if char_buffer_index < buffer_end then
2530 -- distance is too far back in fixed or dynamic block
2531 return -11
2532 end
2533 if char_buffer_index >= -257 then
2534 for _=1, bitlen do
2535 buffer_size = buffer_size + 1
2536 buffer[buffer_size] = buffer[char_buffer_index]
2537 char_buffer_index = char_buffer_index + 1
2538 end
2539 else
2540 char_buffer_index = dict_strlen + char_buffer_index
2541 for _=1, bitlen do
2542 buffer_size = buffer_size + 1
2543 buffer[buffer_size] =
2544 _byte_to_char[dict_string_table[char_buffer_index]]
2545 char_buffer_index = char_buffer_index + 1
2546 end
2547 end
2548 end
2549
2550 if ReaderBitlenLeft() < 0 then
2551 return 2 -- available inflate data did not terminate
2552 end
2553
2554 if buffer_size >= 65536 then
2555 result_buffer[#result_buffer+1] =
2556 table_concat(buffer, "", 1, 32768)
2557 for i=32769, buffer_size do
2558 buffer[i-32768] = buffer[i]
2559 end
2560 buffer_size = buffer_size - 32768
2561 buffer[buffer_size+1] = nil
2562 -- NOTE: buffer[32769..end] and buffer[-257..0] are not cleared.
2563 -- This is why "buffer_size" variable is needed.
2564 end
2565 until symbol == 256
2566
2567 state.buffer_size = buffer_size
2568
2569 return 0
2570end
2571
2572-- Decompress a store block
2573-- @param state decompression state that will be modified by this function.
2574-- @return 0 if succeeds, other value if fails.
2575local function DecompressStoreBlock(state)
2576 local buffer, buffer_size, ReadBits, ReadBytes, ReaderBitlenLeft
2577 , SkipToByteBoundary, result_buffer =
2578 state.buffer, state.buffer_size, state.ReadBits, state.ReadBytes
2579 , state.ReaderBitlenLeft, state.SkipToByteBoundary, state.result_buffer
2580
2581 SkipToByteBoundary()
2582 local bytelen = ReadBits(16)
2583 if ReaderBitlenLeft() < 0 then
2584 return 2 -- available inflate data did not terminate
2585 end
2586 local bytelenComp = ReadBits(16)
2587 if ReaderBitlenLeft() < 0 then
2588 return 2 -- available inflate data did not terminate
2589 end
2590
2591 if bytelen % 256 + bytelenComp % 256 ~= 255 then
2592 return -2 -- Not one's complement
2593 end
2594 if (bytelen-bytelen % 256)/256
2595 + (bytelenComp-bytelenComp % 256)/256 ~= 255 then
2596 return -2 -- Not one's complement
2597 end
2598
2599 -- Note that ReadBytes will skip to the next byte boundary first.
2600 buffer_size = ReadBytes(bytelen, buffer, buffer_size)
2601 if buffer_size < 0 then
2602 return 2 -- available inflate data did not terminate
2603 end
2604
2605 -- memory clean up when there are enough bytes in the buffer.
2606 if buffer_size >= 65536 then
2607 result_buffer[#result_buffer+1] = table_concat(buffer, "", 1, 32768)
2608 for i=32769, buffer_size do
2609 buffer[i-32768] = buffer[i]
2610 end
2611 buffer_size = buffer_size - 32768
2612 buffer[buffer_size+1] = nil
2613 end
2614 state.buffer_size = buffer_size
2615 return 0
2616end
2617
2618-- Decompress a fixed block
2619-- @param state decompression state that will be modified by this function.
2620-- @return 0 if succeeds other value if fails.
2621local function DecompressFixBlock(state)
2622 return DecodeUntilEndOfBlock(state
2623 , _fix_block_literal_huffman_bitlen_count
2624 , _fix_block_literal_huffman_to_deflate_code, 7
2625 , _fix_block_dist_huffman_bitlen_count
2626 , _fix_block_dist_huffman_to_deflate_code, 5)
2627end
2628
2629-- Decompress a dynamic block
2630-- @param state decompression state that will be modified by this function.
2631-- @return 0 if success, other value if fails.
2632local function DecompressDynamicBlock(state)
2633 local ReadBits, Decode = state.ReadBits, state.Decode
2634 local nlen = ReadBits(5) + 257
2635 local ndist = ReadBits(5) + 1
2636 local ncode = ReadBits(4) + 4
2637 if nlen > 286 or ndist > 30 then
2638 -- dynamic block code description: too many length or distance codes
2639 return -3
2640 end
2641
2642 local rle_codes_huffman_bitlens = {}
2643
2644 for i = 1, ncode do
2645 rle_codes_huffman_bitlens[_rle_codes_huffman_bitlen_order[i]] =
2646 ReadBits(3)
2647 end
2648
2649 local rle_codes_err, rle_codes_huffman_bitlen_counts,
2650 rle_codes_huffman_symbols, rle_codes_huffman_min_bitlen =
2651 GetHuffmanForDecode(rle_codes_huffman_bitlens, 18, 7)
2652 if rle_codes_err ~= 0 then -- Require complete code set here
2653 -- dynamic block code description: code lengths codes incomplete
2654 return -4
2655 end
2656
2657 local lcodes_huffman_bitlens = {}
2658 local dcodes_huffman_bitlens = {}
2659 -- Read length/literal and distance code length tables
2660 local index = 0
2661 while index < nlen + ndist do
2662 local symbol -- Decoded value
2663 local bitlen -- Last length to repeat
2664
2665 symbol = Decode(rle_codes_huffman_bitlen_counts
2666 , rle_codes_huffman_symbols, rle_codes_huffman_min_bitlen)
2667
2668 if symbol < 0 then
2669 return symbol -- Invalid symbol
2670 elseif symbol < 16 then
2671 if index < nlen then
2672 lcodes_huffman_bitlens[index] = symbol
2673 else
2674 dcodes_huffman_bitlens[index-nlen] = symbol
2675 end
2676 index = index + 1
2677 else
2678 bitlen = 0
2679 if symbol == 16 then
2680 if index == 0 then
2681 -- dynamic block code description: repeat lengths
2682 -- with no first length
2683 return -5
2684 end
2685 if index-1 < nlen then
2686 bitlen = lcodes_huffman_bitlens[index-1]
2687 else
2688 bitlen = dcodes_huffman_bitlens[index-nlen-1]
2689 end
2690 symbol = 3 + ReadBits(2)
2691 elseif symbol == 17 then -- Repeat zero 3..10 times
2692 symbol = 3 + ReadBits(3)
2693 else -- == 18, repeat zero 11.138 times
2694 symbol = 11 + ReadBits(7)
2695 end
2696 if index + symbol > nlen + ndist then
2697 -- dynamic block code description:
2698 -- repeat more than specified lengths
2699 return -6
2700 end
2701 while symbol > 0 do -- Repeat last or zero symbol times
2702 symbol = symbol - 1
2703 if index < nlen then
2704 lcodes_huffman_bitlens[index] = bitlen
2705 else
2706 dcodes_huffman_bitlens[index-nlen] = bitlen
2707 end
2708 index = index + 1
2709 end
2710 end
2711 end
2712
2713 if (lcodes_huffman_bitlens[256] or 0) == 0 then
2714 -- dynamic block code description: missing end-of-block code
2715 return -9
2716 end
2717
2718 local lcodes_err, lcodes_huffman_bitlen_counts
2719 , lcodes_huffman_symbols, lcodes_huffman_min_bitlen =
2720 GetHuffmanForDecode(lcodes_huffman_bitlens, nlen-1, 15)
2721 --dynamic block code description: invalid literal/length code lengths,
2722 -- Incomplete code ok only for single length 1 code
2723 if (lcodes_err ~=0 and (lcodes_err < 0
2724 or nlen ~= (lcodes_huffman_bitlen_counts[0] or 0)
2725 +(lcodes_huffman_bitlen_counts[1] or 0))) then
2726 return -7
2727 end
2728
2729 local dcodes_err, dcodes_huffman_bitlen_counts
2730 , dcodes_huffman_symbols, dcodes_huffman_min_bitlen =
2731 GetHuffmanForDecode(dcodes_huffman_bitlens, ndist-1, 15)
2732 -- dynamic block code description: invalid distance code lengths,
2733 -- Incomplete code ok only for single length 1 code
2734 if (dcodes_err ~=0 and (dcodes_err < 0
2735 or ndist ~= (dcodes_huffman_bitlen_counts[0] or 0)
2736 + (dcodes_huffman_bitlen_counts[1] or 0))) then
2737 return -8
2738 end
2739
2740 -- Build buffman table for literal/length codes
2741 return DecodeUntilEndOfBlock(state, lcodes_huffman_bitlen_counts
2742 , lcodes_huffman_symbols, lcodes_huffman_min_bitlen
2743 , dcodes_huffman_bitlen_counts, dcodes_huffman_symbols
2744 , dcodes_huffman_min_bitlen)
2745end
2746
2747-- Decompress a deflate stream
2748-- @param state: a decompression state
2749-- @return the decompressed string if succeeds. nil if fails.
2750local function Inflate(state)
2751 local ReadBits = state.ReadBits
2752
2753 local is_last_block
2754 while not is_last_block do
2755 is_last_block = (ReadBits(1) == 1)
2756 local block_type = ReadBits(2)
2757 local status
2758 if block_type == 0 then
2759 status = DecompressStoreBlock(state)
2760 elseif block_type == 1 then
2761 status = DecompressFixBlock(state)
2762 elseif block_type == 2 then
2763 status = DecompressDynamicBlock(state)
2764 else
2765 return nil, -1 -- invalid block type (type == 3)
2766 end
2767 if status ~= 0 then
2768 return nil, status
2769 end
2770 -- ComputerCraft requires this for long-running processes
2771 if os and os.pullEvent then
2772 os.queueEvent("nosleep")
2773 os.pullEvent()
2774 end
2775 end
2776
2777 state.result_buffer[#state.result_buffer+1] =
2778 table_concat(state.buffer, "", 1, state.buffer_size)
2779 local result = table_concat(state.result_buffer)
2780 return result
2781end
2782
2783-- @see LibDeflate:DecompressDeflate(str)
2784-- @see LibDeflate:DecompressDeflateWithDict(str, dictionary)
2785local function DecompressDeflateInternal(str, dictionary)
2786 local state = CreateDecompressState(str, dictionary)
2787 local result, status = Inflate(state)
2788 if not result then
2789 return nil, status
2790 end
2791
2792 local bitlen_left = state.ReaderBitlenLeft()
2793 local bytelen_left = (bitlen_left - bitlen_left % 8) / 8
2794 return result, bytelen_left
2795end
2796
2797-- @see LibDeflate:DecompressZlib(str)
2798-- @see LibDeflate:DecompressZlibWithDict(str)
2799local function DecompressZlibInternal(str, dictionary)
2800 local state = CreateDecompressState(str, dictionary)
2801 local ReadBits = state.ReadBits
2802
2803 local CMF = ReadBits(8)
2804 if state.ReaderBitlenLeft() < 0 then
2805 return nil, 2 -- available inflate data did not terminate
2806 end
2807 local CM = CMF % 16
2808 local CINFO = (CMF - CM) / 16
2809 if CM ~= 8 then
2810 return nil, -12 -- invalid compression method
2811 end
2812 if CINFO > 7 then
2813 return nil, -13 -- invalid window size
2814 end
2815
2816 local FLG = ReadBits(8)
2817 if state.ReaderBitlenLeft() < 0 then
2818 return nil, 2 -- available inflate data did not terminate
2819 end
2820 if (CMF*256+FLG)%31 ~= 0 then
2821 return nil, -14 -- invalid header checksum
2822 end
2823
2824 local FDIST = ((FLG-FLG%32)/32 % 2)
2825 local FLEVEL = ((FLG-FLG%64)/64 % 4) -- luacheck: ignore FLEVEL
2826
2827 if FDIST == 1 then
2828 if not dictionary then
2829 return nil, -16 -- need dictonary, but dictionary is not provided.
2830 end
2831 local byte3 = ReadBits(8)
2832 local byte2 = ReadBits(8)
2833 local byte1 = ReadBits(8)
2834 local byte0 = ReadBits(8)
2835 local actual_adler32 = byte3*16777216+byte2*65536+byte1*256+byte0
2836 if state.ReaderBitlenLeft() < 0 then
2837 return nil, 2 -- available inflate data did not terminate
2838 end
2839 if not IsEqualAdler32(actual_adler32, dictionary.adler32) then
2840 return nil, -17 -- dictionary adler32 does not match
2841 end
2842 end
2843 local result, status = Inflate(state)
2844 if not result then
2845 return nil, status
2846 end
2847 state.SkipToByteBoundary()
2848
2849 local adler_byte0 = ReadBits(8)
2850 local adler_byte1 = ReadBits(8)
2851 local adler_byte2 = ReadBits(8)
2852 local adler_byte3 = ReadBits(8)
2853 if state.ReaderBitlenLeft() < 0 then
2854 return nil, 2 -- available inflate data did not terminate
2855 end
2856
2857 local adler32_expected = adler_byte0*16777216
2858 + adler_byte1*65536 + adler_byte2*256 + adler_byte3
2859 local adler32_actual = LibDeflate:Adler32(result)
2860 if not IsEqualAdler32(adler32_expected, adler32_actual) then
2861 return nil, -15 -- Adler32 checksum does not match
2862 end
2863
2864 local bitlen_left = state.ReaderBitlenLeft()
2865 local bytelen_left = (bitlen_left - bitlen_left % 8) / 8
2866 return result, bytelen_left
2867end
2868
2869--- Decompress a raw deflate compressed data.
2870-- @param str [string] The data to be decompressed.
2871-- @return [string/nil] If the decompression succeeds, return the decompressed
2872-- data. If the decompression fails, return nil. You should check if this return
2873-- value is non-nil to know if the decompression succeeds.
2874-- @return [integer] If the decompression succeeds, return the number of
2875-- unprocessed bytes in the input compressed data. This return value is a
2876-- positive integer if the input data is a valid compressed data appended by an
2877-- arbitary non-empty string. This return value is 0 if the input data does not
2878-- contain any extra bytes.<br>
2879-- If the decompression fails (The first return value of this function is nil),
2880-- this return value is undefined.
2881-- @see LibDeflate:CompressDeflate
2882function LibDeflate:DecompressDeflate(str)
2883 local arg_valid, arg_err = IsValidArguments(str)
2884 if not arg_valid then
2885 error(("Usage: LibDeflate:DecompressDeflate(str): "
2886 ..arg_err), 2)
2887 end
2888 return DecompressDeflateInternal(str)
2889end
2890
2891--- Decompress a raw deflate compressed data with a preset dictionary.
2892-- @param str [string] The data to be decompressed.
2893-- @param dictionary [table] The preset dictionary used by
2894-- LibDeflate:CompressDeflateWithDict when the compressed data is produced.
2895-- Decompression and compression must use the same dictionary.
2896-- Otherwise wrong decompressed data could be produced without generating any
2897-- error.
2898-- @return [string/nil] If the decompression succeeds, return the decompressed
2899-- data. If the decompression fails, return nil. You should check if this return
2900-- value is non-nil to know if the decompression succeeds.
2901-- @return [integer] If the decompression succeeds, return the number of
2902-- unprocessed bytes in the input compressed data. This return value is a
2903-- positive integer if the input data is a valid compressed data appended by an
2904-- arbitary non-empty string. This return value is 0 if the input data does not
2905-- contain any extra bytes.<br>
2906-- If the decompression fails (The first return value of this function is nil),
2907-- this return value is undefined.
2908-- @see LibDeflate:CompressDeflateWithDict
2909function LibDeflate:DecompressDeflateWithDict(str, dictionary)
2910 local arg_valid, arg_err = IsValidArguments(str, true, dictionary)
2911 if not arg_valid then
2912 error(("Usage: LibDeflate:DecompressDeflateWithDict(str, dictionary): "
2913 ..arg_err), 2)
2914 end
2915 return DecompressDeflateInternal(str, dictionary)
2916end
2917
2918--- Decompress a zlib compressed data.
2919-- @param str [string] The data to be decompressed
2920-- @return [string/nil] If the decompression succeeds, return the decompressed
2921-- data. If the decompression fails, return nil. You should check if this return
2922-- value is non-nil to know if the decompression succeeds.
2923-- @return [integer] If the decompression succeeds, return the number of
2924-- unprocessed bytes in the input compressed data. This return value is a
2925-- positive integer if the input data is a valid compressed data appended by an
2926-- arbitary non-empty string. This return value is 0 if the input data does not
2927-- contain any extra bytes.<br>
2928-- If the decompression fails (The first return value of this function is nil),
2929-- this return value is undefined.
2930-- @see LibDeflate:CompressZlib
2931function LibDeflate:DecompressZlib(str)
2932 local arg_valid, arg_err = IsValidArguments(str)
2933 if not arg_valid then
2934 error(("Usage: LibDeflate:DecompressZlib(str): "
2935 ..arg_err), 2)
2936 end
2937 return DecompressZlibInternal(str)
2938end
2939
2940--- Decompress a zlib compressed data with a preset dictionary.
2941-- @param str [string] The data to be decompressed
2942-- @param dictionary [table] The preset dictionary used by
2943-- LibDeflate:CompressDeflateWithDict when the compressed data is produced.
2944-- Decompression and compression must use the same dictionary.
2945-- Otherwise wrong decompressed data could be produced without generating any
2946-- error.
2947-- @return [string/nil] If the decompression succeeds, return the decompressed
2948-- data. If the decompression fails, return nil. You should check if this return
2949-- value is non-nil to know if the decompression succeeds.
2950-- @return [integer] If the decompression succeeds, return the number of
2951-- unprocessed bytes in the input compressed data. This return value is a
2952-- positive integer if the input data is a valid compressed data appended by an
2953-- arbitary non-empty string. This return value is 0 if the input data does not
2954-- contain any extra bytes.<br>
2955-- If the decompression fails (The first return value of this function is nil),
2956-- this return value is undefined.
2957-- @see LibDeflate:CompressZlibWithDict
2958function LibDeflate:DecompressZlibWithDict(str, dictionary)
2959 local arg_valid, arg_err = IsValidArguments(str, true, dictionary)
2960 if not arg_valid then
2961 error(("Usage: LibDeflate:DecompressZlibWithDict(str, dictionary): "
2962 ..arg_err), 2)
2963 end
2964 return DecompressZlibInternal(str, dictionary)
2965end
2966
2967-- Calculate the huffman code of fixed block
2968do
2969 _fix_block_literal_huffman_bitlen = {}
2970 for sym=0, 143 do
2971 _fix_block_literal_huffman_bitlen[sym] = 8
2972 end
2973 for sym=144, 255 do
2974 _fix_block_literal_huffman_bitlen[sym] = 9
2975 end
2976 for sym=256, 279 do
2977 _fix_block_literal_huffman_bitlen[sym] = 7
2978 end
2979 for sym=280, 287 do
2980 _fix_block_literal_huffman_bitlen[sym] = 8
2981 end
2982
2983 _fix_block_dist_huffman_bitlen = {}
2984 for dist=0, 31 do
2985 _fix_block_dist_huffman_bitlen[dist] = 5
2986 end
2987 local status
2988 status, _fix_block_literal_huffman_bitlen_count
2989 , _fix_block_literal_huffman_to_deflate_code =
2990 GetHuffmanForDecode(_fix_block_literal_huffman_bitlen, 287, 9)
2991 assert(status == 0)
2992 status, _fix_block_dist_huffman_bitlen_count,
2993 _fix_block_dist_huffman_to_deflate_code =
2994 GetHuffmanForDecode(_fix_block_dist_huffman_bitlen, 31, 5)
2995 assert(status == 0)
2996
2997 _fix_block_literal_huffman_code =
2998 GetHuffmanCodeFromBitlen(_fix_block_literal_huffman_bitlen_count
2999 , _fix_block_literal_huffman_bitlen, 287, 9)
3000 _fix_block_dist_huffman_code =
3001 GetHuffmanCodeFromBitlen(_fix_block_dist_huffman_bitlen_count
3002 , _fix_block_dist_huffman_bitlen, 31, 5)
3003end
3004
3005--- Decompress a gzip compressed data.
3006-- @param str [string] The data to be decompressed
3007-- @return [string/nil] If the decompression succeeds, return the decompressed
3008-- data. If the decompression fails, return nil. You should check if this return
3009-- value is non-nil to know if the decompression succeeds.
3010-- @return [integer] If the decompression succeeds, return the number of
3011-- unprocessed bytes in the input compressed data. This return value is a
3012-- positive integer if the input data is a valid compressed data appended by an
3013-- arbitary non-empty string. This return value is 0 if the input data does not
3014-- contain any extra bytes.<br>
3015-- If the decompression fails (The first return value of this function is nil),
3016-- this return value is undefined.
3017function LibDeflate:DecompressGzip(str)
3018 local arg_valid, arg_err = IsValidArguments(str)
3019 if not arg_valid then
3020 error(("Usage: LibDeflate:DecompressGzip(str): "..arg_err), 2)
3021 end
3022 if string_byte(string.sub(str, 1, 1)) ~= 31 or string_byte(string.sub(str, 2, 2)) ~= 139 then
3023 return nil, -1
3024 end
3025 if band(string_byte(string.sub(str, 4, 4)), 0xE0) ~= 0 then
3026 return nil, -3
3027 end
3028 if string_byte(string.sub(str, 3, 3)) ~= 8 then
3029 return nil, -4
3030 end
3031 local offset = 10
3032 if band(string_byte(string.sub(str, 4, 4)), 4) == 4 then
3033 offset = offset + string_byte(string.sub(str, 11, 11)) * 256 + string_byte(string.sub(str, 12, 12))
3034 end
3035 if band(string_byte(string.sub(str, 4, 4)), 8) == 8 then
3036 while string_byte(string.sub(str, offset, offset)) ~= 0 do offset = offset + 1 end
3037 end
3038 if band(string_byte(string.sub(str, 4, 4)), 16) == 16 then
3039 while string_byte(string.sub(str, offset, offset)) ~= 0 do offset = offset + 1 end
3040 end
3041 if band(string_byte(string.sub(str, 4, 4)), 2) == 2 then
3042 local src_checksum = string_byte(string.sub(str, offset + 1, offset + 1)) * 256 + string_byte(string.sub(str, offset, offset))
3043 local target_checksum = band(self:CRC32(string.sub(str, 1, offset - 1)), 0xFFFF)
3044 if xor(src_checksum, target_checksum) ~= 0xFFFF then return nil, -5 end
3045 offset = offset + 2
3046 end
3047 local res, err = DecompressDeflateInternal(string.sub(str, offset + 1, -8))
3048 if res == nil then return res, err end
3049 local src_checksum = string_byte(string.sub(str, -5, -5)) * 0x1000000 + string_byte(string.sub(str, -6, -6)) * 0x10000 + string_byte(string.sub(str, -7, -7)) * 256 + string_byte(string.sub(str, -8, -8))
3050 src_checksum = bnot(src_checksum)
3051 local target_checksum = self:CRC32(res)
3052 if xor(src_checksum, target_checksum) ~= 0xFFFFFFFF then return nil, -2 end
3053 return res
3054end
3055
3056-- For test. Don't use the functions in this table for real application.
3057-- Stuffs in this table is subject to change.
3058LibDeflate.internals = {
3059 LoadStringToTable = LoadStringToTable,
3060 IsValidDictionary = IsValidDictionary,
3061 IsEqualAdler32 = IsEqualAdler32
3062}
3063
3064return LibDeflate