· last year · Dec 23, 2023, 12:15 AM
1--[[--
2LibDeflate 1.0.0-release <br>
3Pure Lua compressor and decompressor with high compression ratio using
4DEFLATE/zlib format.
5
6@file LibDeflate.lua
7@author Haoqian He (Github: SafeteeWoW; World of Warcraft: Safetyy-Illidan(US))
8@copyright LibDeflate <2018> Haoqian He
9@license GNU General Public License Version 3 or later
10
11This library is implemented according to the following specifications. <br>
12Report a bug if LibDeflate is not fully compliant with those specs. <br>
13Both compressors and decompressors have been implemented in the library.<br>
141. RFC1950: DEFLATE Compressed Data Format Specification version 1.3 <br>
15https://tools.ietf.org/html/rfc1951 <br>
162. RFC1951: ZLIB Compressed Data Format Specification version 3.3 <br>
17https://tools.ietf.org/html/rfc1950 <br>
18
19This library requires Lua 5.1/5.2/5.3 interpreter or LuaJIT v2.0+. <br>
20This library does not have any dependencies. <br>
21<br>
22This file "LibDeflate.lua" is the only source file of
23the library. <br>
24Submit suggestions or report bugs to
25https://github.com/safeteeWow/LibDeflate/issues
26]]
27
28--[[
29This program is free software: you can redistribute it and/or modify
30it under the terms of the GNU General Public License as published by
31the Free Software Foundation, either version 3 of the License, or
32any later version.
33
34This program is distributed in the hope that it will be useful,
35but WITHOUT ANY WARRANTY; without even the implied warranty of
36MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
37GNU General Public License for more details.
38
39You should have received a copy of the GNU General Public License
40along with this program. If not, see https://www.gnu.org/licenses/.
41
42Credits:
431. zlib, by Jean-loup Gailly (compression) and Mark Adler (decompression).
44 http://www.zlib.net/
45 Licensed under zlib License. http://www.zlib.net/zlib_license.html
46 For the compression algorithm.
472. puff, by Mark Adler. https://github.com/madler/zlib/tree/master/contrib/puff
48 Licensed under zlib License. http://www.zlib.net/zlib_license.html
49 For the decompression algorithm.
503. LibCompress, by jjsheets and Galmok of European Stormrage (Horde)
51 https://www.wowace.com/projects/libcompress
52 Licensed under GPLv2.
53 https://www.gnu.org/licenses/old-licenses/gpl-2.0.html
54 For the code to create customized codec.
554. WeakAuras2,
56 https://github.com/WeakAuras/WeakAuras2
57 Licensed under GPLv2.
58 For the 6bit encoding and decoding.
59]]
60
61--[[
62 Curseforge auto-packaging replacements:
63
64 Project Date: @project-date-iso@
65 Project Hash: @project-hash@
66 Project Version: @project-version@
67--]]
68
69local LibDeflate
70
71do
72 -- Semantic version. all lowercase.
73 -- Suffix can be alpha1, alpha2, beta1, beta2, rc1, rc2, etc.
74 -- NOTE: Two version numbers needs to modify.
75 -- 1. On the top of LibDeflate.lua
76 -- 2. HERE
77 local _VERSION = "1.0.0-release"
78
79 local _COPYRIGHT =
80 "LibDeflate ".._VERSION
81 .." Copyright (C) 2018 Haoqian He."
82 .." License GPLv3+: GNU GPL version 3 or later"
83
84 -- Register in the World of Warcraft library "LibStub" if detected.
85 if LibStub then
86 local MAJOR, MINOR = "LibDeflate", -1
87 -- When MAJOR is changed, I should name it as LibDeflate2
88 local lib, minor = LibStub:GetLibrary(MAJOR, true)
89 if lib and minor and minor >= MINOR then -- No need to update.
90 return lib
91 else -- Update or first time register
92 LibDeflate = LibStub:NewLibrary(MAJOR, _VERSION)
93 -- NOTE: It is important that new version has implemented
94 -- all exported APIs and tables in the old version,
95 -- so the old library is fully garbage collected,
96 -- and we 100% ensure the backward compatibility.
97 end
98 else -- "LibStub" is not detected.
99 LibDeflate = {}
100 end
101
102 LibDeflate._VERSION = _VERSION
103 LibDeflate._COPYRIGHT = _COPYRIGHT
104end
105
106-- localize Lua api for faster access.
107local assert = assert
108local error = error
109local pairs = pairs
110local string_byte = string.byte
111local string_char = string.char
112local string_find = string.find
113local string_gsub = string.gsub
114local string_sub = string.sub
115local table_concat = table.concat
116local table_sort = table.sort
117local tostring = tostring
118local type = type
119
120-- Converts i to 2^i, (0<=i<=32)
121-- This is used to implement bit left shift and bit right shift.
122-- "x >> y" in C: "(x-x%_pow2[y])/_pow2[y]" in Lua
123-- "x << y" in C: "x*_pow2[y]" in Lua
124local _pow2 = {}
125
126-- Converts any byte to a character, (0<=byte<=255)
127local _byte_to_char = {}
128
129-- _reverseBitsTbl[len][val] stores the bit reverse of
130-- the number with bit length "len" and value "val"
131-- For example, decimal number 6 with bits length 5 is binary 00110
132-- It's reverse is binary 01100,
133-- which is decimal 12 and 12 == _reverseBitsTbl[5][6]
134-- 1<=len<=9, 0<=val<=2^len-1
135-- The reason for 1<=len<=9 is that the max of min bitlen of huffman code
136-- of a huffman alphabet is 9?
137local _reverse_bits_tbl = {}
138
139-- Convert a LZ77 length (3<=len<=258) to
140-- a deflate literal/LZ77_length code (257<=code<=285)
141local _length_to_deflate_code = {}
142
143-- convert a LZ77 length (3<=len<=258) to
144-- a deflate literal/LZ77_length code extra bits.
145local _length_to_deflate_extra_bits = {}
146
147-- Convert a LZ77 length (3<=len<=258) to
148-- a deflate literal/LZ77_length code extra bit length.
149local _length_to_deflate_extra_bitlen = {}
150
151-- Convert a small LZ77 distance (1<=dist<=256) to a deflate code.
152local _dist256_to_deflate_code = {}
153
154-- Convert a small LZ77 distance (1<=dist<=256) to
155-- a deflate distance code extra bits.
156local _dist256_to_deflate_extra_bits = {}
157
158-- Convert a small LZ77 distance (1<=dist<=256) to
159-- a deflate distance code extra bit length.
160local _dist256_to_deflate_extra_bitlen = {}
161
162-- Convert a literal/LZ77_length deflate code to LZ77 base length
163-- The key of the table is (code - 256), 257<=code<=285
164local _literal_deflate_code_to_base_len = {
165 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31,
166 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258,
167}
168
169-- Convert a literal/LZ77_length deflate code to base LZ77 length extra bits
170-- The key of the table is (code - 256), 257<=code<=285
171local _literal_deflate_code_to_extra_bitlen = {
172 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2,
173 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0,
174}
175
176-- Convert a distance deflate code to base LZ77 distance. (0<=code<=29)
177local _dist_deflate_code_to_base_dist = {
178 [0] = 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
179 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,
180 8193, 12289, 16385, 24577,
181}
182
183-- Convert a distance deflate code to LZ77 bits length. (0<=code<=29)
184local _dist_deflate_code_to_extra_bitlen = {
185 [0] = 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6,
186 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13,
187}
188
189-- The code order of the first huffman header in the dynamic deflate block.
190-- See the page 12 of RFC1951
191local _rle_codes_huffman_bitlen_order = {16, 17, 18,
192 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15,
193}
194
195-- The following tables are used by fixed deflate block.
196-- The value of these tables are assigned at the bottom of the source.
197
198-- The huffman code of the literal/LZ77_length deflate codes,
199-- in fixed deflate block.
200local _fix_block_literal_huffman_code
201
202-- Convert huffman code of the literal/LZ77_length to deflate codes,
203-- in fixed deflate block.
204local _fix_block_literal_huffman_to_deflate_code
205
206-- The bit length of the huffman code of literal/LZ77_length deflate codes,
207-- in fixed deflate block.
208local _fix_block_literal_huffman_bitlen
209
210-- The count of each bit length of the literal/LZ77_length deflate codes,
211-- in fixed deflate block.
212local _fix_block_literal_huffman_bitlen_count
213
214-- The huffman code of the distance deflate codes,
215-- in fixed deflate block.
216local _fix_block_dist_huffman_code
217
218-- Convert huffman code of the distance to deflate codes,
219-- in fixed deflate block.
220local _fix_block_dist_huffman_to_deflate_code
221
222-- The bit length of the huffman code of the distance deflate codes,
223-- in fixed deflate block.
224local _fix_block_dist_huffman_bitlen
225
226-- The count of each bit length of the huffman code of
227-- the distance deflate codes,
228-- in fixed deflate block.
229local _fix_block_dist_huffman_bitlen_count
230
231for i = 0, 255 do
232 _byte_to_char[i] = string_char(i)
233end
234
235do
236 local pow = 1
237 for i = 0, 32 do
238 _pow2[i] = pow
239 pow = pow * 2
240 end
241end
242
243for i = 1, 9 do
244 _reverse_bits_tbl[i] = {}
245 for j=0, _pow2[i+1]-1 do
246 local reverse = 0
247 local value = j
248 for _ = 1, i do
249 -- The following line is equivalent to "res | (code %2)" in C.
250 reverse = reverse - reverse%2
251 + (((reverse%2==1) or (value % 2) == 1) and 1 or 0)
252 value = (value-value%2)/2
253 reverse = reverse * 2
254 end
255 _reverse_bits_tbl[i][j] = (reverse-reverse%2)/2
256 end
257end
258
259-- The source code is written according to the pattern in the numbers
260-- in RFC1951 Page10.
261do
262 local a = 18
263 local b = 16
264 local c = 265
265 local bitlen = 1
266 for len = 3, 258 do
267 if len <= 10 then
268 _length_to_deflate_code[len] = len + 254
269 _length_to_deflate_extra_bitlen[len] = 0
270 elseif len == 258 then
271 _length_to_deflate_code[len] = 285
272 _length_to_deflate_extra_bitlen[len] = 0
273 else
274 if len > a then
275 a = a + b
276 b = b * 2
277 c = c + 4
278 bitlen = bitlen + 1
279 end
280 local t = len-a-1+b/2
281 _length_to_deflate_code[len] = (t-(t%(b/8)))/(b/8) + c
282 _length_to_deflate_extra_bitlen[len] = bitlen
283 _length_to_deflate_extra_bits[len] = t % (b/8)
284 end
285 end
286end
287
288-- The source code is written according to the pattern in the numbers
289-- in RFC1951 Page11.
290do
291 _dist256_to_deflate_code[1] = 0
292 _dist256_to_deflate_code[2] = 1
293 _dist256_to_deflate_extra_bitlen[1] = 0
294 _dist256_to_deflate_extra_bitlen[2] = 0
295
296 local a = 3
297 local b = 4
298 local code = 2
299 local bitlen = 0
300 for dist = 3, 256 do
301 if dist > b then
302 a = a * 2
303 b = b * 2
304 code = code + 2
305 bitlen = bitlen + 1
306 end
307 _dist256_to_deflate_code[dist] = (dist <= a) and code or (code+1)
308 _dist256_to_deflate_extra_bitlen[dist] = (bitlen < 0) and 0 or bitlen
309 if b >= 8 then
310 _dist256_to_deflate_extra_bits[dist] = (dist-b/2-1) % (b/4)
311 end
312 end
313end
314
315-- CRC-16/CRC-32 computation
316local band, bnot, xor, lshift, rshift
317
318if bit ~= nil then
319 band = bit.band
320 bnot = bit.bnot
321 xor = bit.bxor
322 lshift = bit.blshift
323 rshift = bit.blogic_rshift
324elseif bit32 ~= nil then
325 band = bit32.band
326 bnot = bit32.bnot
327 xor = bit32.bxor
328 lshift = bit32.lshift
329 rshift = bit32.rshift
330else
331 xor = function(a, b)
332 local calc = 0
333
334 for i = 32, 0, -1 do
335 local val = 2 ^ i
336 local aa = false
337 local bb = false
338
339 if a == 0 then
340 calc = calc + b
341 break
342 end
343
344 if b == 0 then
345 calc = calc + a
346 break
347 end
348
349 if a >= val then
350 aa = true
351 a = a - val
352 end
353
354 if b >= val then
355 bb = true
356 b = b - val
357 end
358
359 if not (aa and bb) and (aa or bb) then
360 calc = calc + val
361 end
362 end
363
364 return calc
365 end
366
367 lshift = function(num, left)
368 local res = num * (2 ^ left)
369 return res % (2 ^ 32)
370 end
371
372 rshift = function(num, right)
373 local res = num / (2 ^ right)
374 return math.floor(res)
375 end
376
377 band = function(a, b)
378 local p,c=1,0
379 while a>0 and b>0 do
380 local ra,rb=a%2,b%2
381 if ra+rb>1 then c=c+p end
382 a,b,p=(a-ra)/2,(b-rb)/2,p*2
383 end
384 return c
385 end
386
387 bnot = function(x)
388 return bxor(x, (2^(bits or floor(log(x, 2))))-1)
389 end
390end
391
392-- CRC-32-IEEE 802.3 (V.42)
393local POLY = 0xEDB88320
394
395-- Memoize function pattern (like http://lua-users.org/wiki/FuncTables ).
396local function memoize(f)
397 local mt = {}
398 local t = setmetatable({}, mt)
399 function mt:__index(k)
400 local v = f(k); t[k] = v
401 return v
402 end
403 return t
404end
405
406-- CRC table.
407local crc_table = memoize(function(i)
408 local crc = i
409 for _=1,8 do
410 local b = band(crc, 1)
411 crc = rshift(crc, 1)
412 if b == 1 then crc = xor(crc, POLY) end
413 end
414 return crc
415end)
416
417
418local function crc32_byte(byte, crc)
419 crc = bnot(crc or 0)
420 local v1 = rshift(crc, 8)
421 local v2 = crc_table[xor(crc % 256, byte)]
422 return bnot(xor(v1, v2))
423end
424
425
426local function crc32_string(s, crc)
427 crc = crc or 0
428 for i=1,#s do
429 crc = crc32_byte(s:byte(i), crc)
430 end
431 return crc
432end
433
434--- Calculate the CRC-32 checksum of the string.
435-- @param s [string] the input string to calculate its CRC-32 checksum.
436-- @return [integer] The CRC-32 checksum, which is greater or equal to 0,
437-- and less than 2^32 (4294967296).
438function LibDeflate:CRC32(s, crc)
439 if type(s) == 'string' then
440 return crc32_string(s, crc)
441 else
442 return crc32_byte(s, crc)
443 end
444end
445
446--- Calculate the Adler-32 checksum of the string. <br>
447-- See RFC1950 Page 9 https://tools.ietf.org/html/rfc1950 for the
448-- definition of Adler-32 checksum.
449-- @param str [string] the input string to calcuate its Adler-32 checksum.
450-- @return [integer] The Adler-32 checksum, which is greater or equal to 0,
451-- and less than 2^32 (4294967296).
452function LibDeflate:Adler32(str)
453 -- This function is loop unrolled by better performance.
454 --
455 -- Here is the minimum code:
456 --
457 -- local a = 1
458 -- local b = 0
459 -- for i=1, #str do
460 -- local s = string.byte(str, i, i)
461 -- a = (a+s)%65521
462 -- b = (b+a)%65521
463 -- end
464 -- return b*65536+a
465 if type(str) ~= "string" then
466 error(("Usage: LibDeflate:Adler32(str):"
467 .." 'str' - string expected got '%s'."):format(type(str)), 2)
468 end
469 local strlen = #str
470
471 local i = 1
472 local a = 1
473 local b = 0
474 while i <= strlen - 15 do
475 local x1, x2, x3, x4, x5, x6, x7, x8,
476 x9, x10, x11, x12, x13, x14, x15, x16 = string_byte(str, i, i+15)
477 b = (b+16*a+16*x1+15*x2+14*x3+13*x4+12*x5+11*x6+10*x7+9*x8+8*x9
478 +7*x10+6*x11+5*x12+4*x13+3*x14+2*x15+x16)%65521
479 a = (a+x1+x2+x3+x4+x5+x6+x7+x8+x9+x10+x11+x12+x13+x14+x15+x16)%65521
480 i = i + 16
481 end
482 while (i <= strlen) do
483 local x = string_byte(str, i, i)
484 a = (a + x) % 65521
485 b = (b + a) % 65521
486 i = i + 1
487 end
488 return (b*65536+a) % 4294967296
489end
490
491-- Compare adler32 checksum.
492-- adler32 should be compared with a mod to avoid sign problem
493-- 4072834167 (unsigned) is the same adler32 as -222133129
494local function IsEqualAdler32(actual, expected)
495 return (actual % 4294967296) == (expected % 4294967296)
496end
497
498--- Create a preset dictionary.
499--
500-- This function is not fast, and the memory consumption of the produced
501-- dictionary is about 50 times of the input string. Therefore, it is suggestted
502-- to run this function only once in your program.
503--
504-- It is very important to know that if you do use a preset dictionary,
505-- compressors and decompressors MUST USE THE SAME dictionary. That is,
506-- dictionary must be created using the same string. If you update your program
507-- with a new dictionary, people with the old version won't be able to transmit
508-- data with people with the new version. Therefore, changing the dictionary
509-- must be very careful.
510--
511-- The parameters "strlen" and "adler32" add a layer of verification to ensure
512-- the parameter "str" is not modified unintentionally during the program
513-- development.
514--
515-- @usage local dict_str = "1234567890"
516--
517-- -- print(dict_str:len(), LibDeflate:Adler32(dict_str))
518-- -- Hardcode the print result below to verify it to avoid acciently
519-- -- modification of 'str' during the program development.
520-- -- string length: 10, Adler-32: 187433486,
521-- -- Don't calculate string length and its Adler-32 at run-time.
522--
523-- local dict = LibDeflate:CreateDictionary(dict_str, 10, 187433486)
524--
525-- @param str [string] The string used as the preset dictionary. <br>
526-- You should put stuffs that frequently appears in the dictionary
527-- string and preferablely put more frequently appeared stuffs toward the end
528-- of the string. <br>
529-- Empty string and string longer than 32768 bytes are not allowed.
530-- @param strlen [integer] The length of 'str'. Please pass in this parameter
531-- as a hardcoded constant, in order to verify the content of 'str'. The value
532-- of this parameter should be known before your program runs.
533-- @param adler32 [integer] The Adler-32 checksum of 'str'. Please pass in this
534-- parameter as a hardcoded constant, in order to verify the content of 'str'.
535-- The value of this parameter should be known before your program runs.
536-- @return [table] The dictionary used for preset dictionary compression and
537-- decompression.
538-- @raise error if 'strlen' does not match the length of 'str',
539-- or if 'adler32' does not match the Adler-32 checksum of 'str'.
540function LibDeflate:CreateDictionary(str, strlen, adler32)
541 if type(str) ~= "string" then
542 error(("Usage: LibDeflate:CreateDictionary(str, strlen, adler32):"
543 .." 'str' - string expected got '%s'."):format(type(str)), 2)
544 end
545 if type(strlen) ~= "number" then
546 error(("Usage: LibDeflate:CreateDictionary(str, strlen, adler32):"
547 .." 'strlen' - number expected got '%s'."):format(
548 type(strlen)), 2)
549 end
550 if type(adler32) ~= "number" then
551 error(("Usage: LibDeflate:CreateDictionary(str, strlen, adler32):"
552 .." 'adler32' - number expected got '%s'."):format(
553 type(adler32)), 2)
554 end
555 if strlen ~= #str then
556 error(("Usage: LibDeflate:CreateDictionary(str, strlen, adler32):"
557 .." 'strlen' does not match the actual length of 'str'."
558 .." 'strlen': %u, '#str': %u ."
559 .." Please check if 'str' is modified unintentionally.")
560 :format(strlen, #str))
561 end
562 if strlen == 0 then
563 error(("Usage: LibDeflate:CreateDictionary(str, strlen, adler32):"
564 .." 'str' - Empty string is not allowed."), 2)
565 end
566 if strlen > 32768 then
567 error(("Usage: LibDeflate:CreateDictionary(str, strlen, adler32):"
568 .." 'str' - string longer than 32768 bytes is not allowed."
569 .." Got %d bytes."):format(strlen), 2)
570 end
571 local actual_adler32 = self:Adler32(str)
572 if not IsEqualAdler32(adler32, actual_adler32) then
573 error(("Usage: LibDeflate:CreateDictionary(str, strlen, adler32):"
574 .." 'adler32' does not match the actual adler32 of 'str'."
575 .." 'adler32': %u, 'Adler32(str)': %u ."
576 .." Please check if 'str' is modified unintentionally.")
577 :format(adler32, actual_adler32))
578 end
579
580 local dictionary = {}
581 dictionary.adler32 = adler32
582 dictionary.hash_tables = {}
583 dictionary.string_table = {}
584 dictionary.strlen = strlen
585 local string_table = dictionary.string_table
586 local hash_tables = dictionary.hash_tables
587 string_table[1] = string_byte(str, 1, 1)
588 string_table[2] = string_byte(str, 2, 2)
589 if strlen >= 3 then
590 local i = 1
591 local hash = string_table[1]*256+string_table[2]
592 while i <= strlen - 2 - 3 do
593 local x1, x2, x3, x4 = string_byte(str, i+2, i+5)
594 string_table[i+2] = x1
595 string_table[i+3] = x2
596 string_table[i+4] = x3
597 string_table[i+5] = x4
598 hash = (hash*256+x1)%16777216
599 local t = hash_tables[hash]
600 if not t then t = {}; hash_tables[hash] = t end
601 t[#t+1] = i-strlen
602 i = i + 1
603 hash = (hash*256+x2)%16777216
604 t = hash_tables[hash]
605 if not t then t = {}; hash_tables[hash] = t end
606 t[#t+1] = i-strlen
607 i = i + 1
608 hash = (hash*256+x3)%16777216
609 t = hash_tables[hash]
610 if not t then t = {}; hash_tables[hash] = t end
611 t[#t+1] = i-strlen
612 i = i + 1
613 hash = (hash*256+x4)%16777216
614 t = hash_tables[hash]
615 if not t then t = {}; hash_tables[hash] = t end
616 t[#t+1] = i-strlen
617 i = i + 1
618 end
619 while i <= strlen - 2 do
620 local x = string_byte(str, i+2)
621 string_table[i+2] = x
622 hash = (hash*256+x)%16777216
623 local t = hash_tables[hash]
624 if not t then t = {}; hash_tables[hash] = t end
625 t[#t+1] = i-strlen
626 i = i + 1
627 end
628 end
629 return dictionary
630end
631
632-- Check if the dictionary is valid.
633-- @param dictionary The preset dictionary for compression and decompression.
634-- @return true if valid, false if not valid.
635-- @return if not valid, the error message.
636local function IsValidDictionary(dictionary)
637 if type(dictionary) ~= "table" then
638 return false, ("'dictionary' - table expected got '%s'.")
639 :format(type(dictionary))
640 end
641 if type(dictionary.adler32) ~= "number"
642 or type(dictionary.string_table) ~= "table"
643 or type(dictionary.strlen) ~= "number"
644 or dictionary.strlen <= 0
645 or dictionary.strlen > 32768
646 or dictionary.strlen ~= #dictionary.string_table
647 or type(dictionary.hash_tables) ~= "table"
648 then
649 return false, ("'dictionary' - corrupted dictionary.")
650 :format(type(dictionary))
651 end
652 return true, ""
653end
654
655--[[
656 key of the configuration table is the compression level,
657 and its value stores the compression setting.
658 These numbers come from zlib source code.
659
660 Higher compression level usually means better compression.
661 (Because LibDeflate uses a simplified version of zlib algorithm,
662 there is no guarantee that higher compression level does not create
663 bigger file than lower level, but I can say it's 99% likely)
664
665 Be careful with the high compression level. This is a pure lua
666 implementation compressor/decompressor, which is significant slower than
667 a C/C++ equivalant compressor/decompressor. Very high compression level
668 costs significant more CPU time, and usually compression size won't be
669 significant smaller when you increase compression level by 1, when the
670 level is already very high. Benchmark yourself if you can afford it.
671
672 See also https://github.com/madler/zlib/blob/master/doc/algorithm.txt,
673 https://github.com/madler/zlib/blob/master/deflate.c for more information.
674
675 The meaning of each field:
676 @field 1 use_lazy_evaluation:
677 true/false. Whether the program uses lazy evaluation.
678 See what is "lazy evaluation" in the link above.
679 lazy_evaluation improves ratio, but relatively slow.
680 @field 2 good_prev_length:
681 Only effective if lazy is set, Only use 1/4 of max_chain,
682 if prev length of lazy match is above this.
683 @field 3 max_insert_length/max_lazy_match:
684 If not using lazy evaluation,
685 insert new strings in the hash table only if the match length is not
686 greater than this length.
687 If using lazy evaluation, only continue lazy evaluation,
688 if previous match length is strictly smaller than this value.
689 @field 4 nice_length:
690 Number. Don't continue to go down the hash chain,
691 if match length is above this.
692 @field 5 max_chain:
693 Number. The maximum number of hash chains we look.
694--]]
695local _compression_level_configs = {
696 [0] = {false, nil, 0, 0, 0}, -- level 0, no compression
697 [1] = {false, nil, 4, 8, 4}, -- level 1, similar to zlib level 1
698 [2] = {false, nil, 5, 18, 8}, -- level 2, similar to zlib level 2
699 [3] = {false, nil, 6, 32, 32}, -- level 3, similar to zlib level 3
700 [4] = {true, 4, 4, 16, 16}, -- level 4, similar to zlib level 4
701 [5] = {true, 8, 16, 32, 32}, -- level 5, similar to zlib level 5
702 [6] = {true, 8, 16, 128, 128}, -- level 6, similar to zlib level 6
703 [7] = {true, 8, 32, 128, 256}, -- (SLOW) level 7, similar to zlib level 7
704 [8] = {true, 32, 128, 258, 1024} , --(SLOW) level 8,similar to zlib level 8
705 [9] = {true, 32, 258, 258, 4096},
706 -- (VERY SLOW) level 9, similar to zlib level 9
707}
708
709-- Check if the compression/decompression arguments is valid
710-- @param str The input string.
711-- @param check_dictionary if true, check if dictionary is valid.
712-- @param dictionary The preset dictionary for compression and decompression.
713-- @param check_configs if true, check if config is valid.
714-- @param configs The compression configuration table
715-- @return true if valid, false if not valid.
716-- @return if not valid, the error message.
717local function IsValidArguments(str,
718 check_dictionary, dictionary,
719 check_configs, configs)
720
721 if type(str) ~= "string" then
722 return false,
723 ("'str' - string expected got '%s'."):format(type(str))
724 end
725 if check_dictionary then
726 local dict_valid, dict_err = IsValidDictionary(dictionary)
727 if not dict_valid then
728 return false, dict_err
729 end
730 end
731 if check_configs then
732 local type_configs = type(configs)
733 if type_configs ~= "nil" and type_configs ~= "table" then
734 return false,
735 ("'configs' - nil or table expected got '%s'.")
736 :format(type(configs))
737 end
738 if type_configs == "table" then
739 for k, v in pairs(configs) do
740 if k ~= "level" and k ~= "strategy" then
741 return false,
742 ("'configs' - unsupported table key in the configs: '%s'.")
743 :format(k)
744 elseif k == "level" and not _compression_level_configs[v] then
745 return false,
746 ("'configs' - unsupported 'level': %s."):format(tostring(v))
747 elseif k == "strategy" and v ~= "fixed" and v ~= "huffman_only"
748 and v ~= "dynamic" then
749 -- random_block_type is for testing purpose
750 return false, ("'configs' - unsupported 'strategy': '%s'.")
751 :format(tostring(v))
752 end
753 end
754 end
755 end
756 return true, ""
757end
758
759
760
761--[[ --------------------------------------------------------------------------
762 Compress code
763--]] --------------------------------------------------------------------------
764
765-- partial flush to save memory
766local _FLUSH_MODE_MEMORY_CLEANUP = 0
767-- full flush with partial bytes
768local _FLUSH_MODE_OUTPUT = 1
769-- write bytes to get to byte boundary
770local _FLUSH_MODE_BYTE_BOUNDARY = 2
771-- no flush, just get num of bits written so far
772local _FLUSH_MODE_NO_FLUSH = 3
773
774--[[
775 Create an empty writer to easily write stuffs as the unit of bits.
776 Return values:
777 1. WriteBits(code, bitlen):
778 2. WriteString(str):
779 3. Flush(mode):
780--]]
781local function CreateWriter()
782 local buffer_size = 0
783 local cache = 0
784 local cache_bitlen = 0
785 local total_bitlen = 0
786 local buffer = {}
787 -- When buffer is big enough, flush into result_buffer to save memory.
788 local result_buffer = {}
789
790 -- Write bits with value "value" and bit length of "bitlen" into writer.
791 -- @param value: The value being written
792 -- @param bitlen: The bit length of "value"
793 -- @return nil
794 local function WriteBits(value, bitlen)
795 cache = cache + value * _pow2[cache_bitlen]
796 cache_bitlen = cache_bitlen + bitlen
797 total_bitlen = total_bitlen + bitlen
798 -- Only bulk to buffer every 4 bytes. This is quicker.
799 if cache_bitlen >= 32 then
800 buffer_size = buffer_size + 1
801 buffer[buffer_size] =
802 _byte_to_char[cache % 256]
803 .._byte_to_char[((cache-cache%256)/256 % 256)]
804 .._byte_to_char[((cache-cache%65536)/65536 % 256)]
805 .._byte_to_char[((cache-cache%16777216)/16777216 % 256)]
806 local rshift_mask = _pow2[32 - cache_bitlen + bitlen]
807 cache = (value - value%rshift_mask)/rshift_mask
808 cache_bitlen = cache_bitlen - 32
809 end
810 end
811
812 -- Write the entire string into the writer.
813 -- @param str The string being written
814 -- @return nil
815 local function WriteString(str)
816 for _ = 1, cache_bitlen, 8 do
817 buffer_size = buffer_size + 1
818 buffer[buffer_size] = string_char(cache % 256)
819 cache = (cache-cache%256)/256
820 end
821 cache_bitlen = 0
822 buffer_size = buffer_size + 1
823 buffer[buffer_size] = str
824 total_bitlen = total_bitlen + #str*8
825 end
826
827 -- Flush current stuffs in the writer and return it.
828 -- This operation will free most of the memory.
829 -- @param mode See the descrtion of the constant and the source code.
830 -- @return The total number of bits stored in the writer right now.
831 -- for byte boundary mode, it includes the padding bits.
832 -- for output mode, it does not include padding bits.
833 -- @return Return the outputs if mode is output.
834 local function FlushWriter(mode)
835 if mode == _FLUSH_MODE_NO_FLUSH then
836 return total_bitlen
837 end
838
839 if mode == _FLUSH_MODE_OUTPUT
840 or mode == _FLUSH_MODE_BYTE_BOUNDARY then
841 -- Full flush, also output cache.
842 -- Need to pad some bits if cache_bitlen is not multiple of 8.
843 local padding_bitlen = (8 - cache_bitlen % 8) % 8
844
845 if cache_bitlen > 0 then
846 -- padding with all 1 bits, mainly because "\000" is not
847 -- good to be tranmitted. I do this so "\000" is a little bit
848 -- less frequent.
849 cache = cache - _pow2[cache_bitlen]
850 + _pow2[cache_bitlen+padding_bitlen]
851 for _ = 1, cache_bitlen, 8 do
852 buffer_size = buffer_size + 1
853 buffer[buffer_size] = _byte_to_char[cache % 256]
854 cache = (cache-cache%256)/256
855 end
856
857 cache = 0
858 cache_bitlen = 0
859 end
860 if mode == _FLUSH_MODE_BYTE_BOUNDARY then
861 total_bitlen = total_bitlen + padding_bitlen
862 return total_bitlen
863 end
864 end
865
866 local flushed = table_concat(buffer)
867 buffer = {}
868 buffer_size = 0
869 result_buffer[#result_buffer+1] = flushed
870
871 if mode == _FLUSH_MODE_MEMORY_CLEANUP then
872 return total_bitlen
873 else
874 return total_bitlen, table_concat(result_buffer)
875 end
876 end
877
878 return WriteBits, WriteString, FlushWriter
879end
880
881-- Push an element into a max heap
882-- @param heap A max heap whose max element is at index 1.
883-- @param e The element to be pushed. Assume element "e" is a table
884-- and comparison is done via its first entry e[1]
885-- @param heap_size current number of elements in the heap.
886-- NOTE: There may be some garbage stored in
887-- heap[heap_size+1], heap[heap_size+2], etc..
888-- @return nil
889local function MinHeapPush(heap, e, heap_size)
890 heap_size = heap_size + 1
891 heap[heap_size] = e
892 local value = e[1]
893 local pos = heap_size
894 local parent_pos = (pos-pos%2)/2
895
896 while (parent_pos >= 1 and heap[parent_pos][1] > value) do
897 local t = heap[parent_pos]
898 heap[parent_pos] = e
899 heap[pos] = t
900 pos = parent_pos
901 parent_pos = (parent_pos-parent_pos%2)/2
902 end
903end
904
905-- Pop an element from a max heap
906-- @param heap A max heap whose max element is at index 1.
907-- @param heap_size current number of elements in the heap.
908-- @return the poped element
909-- Note: This function does not change table size of "heap" to save CPU time.
910local function MinHeapPop(heap, heap_size)
911 local top = heap[1]
912 local e = heap[heap_size]
913 local value = e[1]
914 heap[1] = e
915 heap[heap_size] = top
916 heap_size = heap_size - 1
917
918 local pos = 1
919 local left_child_pos = pos * 2
920 local right_child_pos = left_child_pos + 1
921
922 while (left_child_pos <= heap_size) do
923 local left_child = heap[left_child_pos]
924 if (right_child_pos <= heap_size
925 and heap[right_child_pos][1] < left_child[1]) then
926 local right_child = heap[right_child_pos]
927 if right_child[1] < value then
928 heap[right_child_pos] = e
929 heap[pos] = right_child
930 pos = right_child_pos
931 left_child_pos = pos * 2
932 right_child_pos = left_child_pos + 1
933 else
934 break
935 end
936 else
937 if left_child[1] < value then
938 heap[left_child_pos] = e
939 heap[pos] = left_child
940 pos = left_child_pos
941 left_child_pos = pos * 2
942 right_child_pos = left_child_pos + 1
943 else
944 break
945 end
946 end
947 end
948
949 return top
950end
951
952-- Deflate defines a special huffman tree, which is unique once the bit length
953-- of huffman code of all symbols are known.
954-- @param bitlen_count Number of symbols with a specific bitlen
955-- @param symbol_bitlen The bit length of a symbol
956-- @param max_symbol The max symbol among all symbols,
957-- which is (number of symbols - 1)
958-- @param max_bitlen The max huffman bit length among all symbols.
959-- @return The huffman code of all symbols.
960local function GetHuffmanCodeFromBitlen(bitlen_counts, symbol_bitlens
961 , max_symbol, max_bitlen)
962 local huffman_code = 0
963 local next_codes = {}
964 local symbol_huffman_codes = {}
965 for bitlen = 1, max_bitlen do
966 huffman_code = (huffman_code+(bitlen_counts[bitlen-1] or 0))*2
967 next_codes[bitlen] = huffman_code
968 end
969 for symbol = 0, max_symbol do
970 local bitlen = symbol_bitlens[symbol]
971 if bitlen then
972 huffman_code = next_codes[bitlen]
973 next_codes[bitlen] = huffman_code + 1
974
975 -- Reverse the bits of huffman code,
976 -- because most signifant bits of huffman code
977 -- is stored first into the compressed data.
978 -- @see RFC1951 Page5 Section 3.1.1
979 if bitlen <= 9 then -- Have cached reverse for small bitlen.
980 symbol_huffman_codes[symbol] =
981 _reverse_bits_tbl[bitlen][huffman_code]
982 else
983 local reverse = 0
984 for _ = 1, bitlen do
985 reverse = reverse - reverse%2
986 + (((reverse%2==1)
987 or (huffman_code % 2) == 1) and 1 or 0)
988 huffman_code = (huffman_code-huffman_code%2)/2
989 reverse = reverse*2
990 end
991 symbol_huffman_codes[symbol] = (reverse-reverse%2)/2
992 end
993 end
994 end
995 return symbol_huffman_codes
996end
997
998-- A helper function to sort heap elements
999-- a[1], b[1] is the huffman frequency
1000-- a[2], b[2] is the symbol value.
1001local function SortByFirstThenSecond(a, b)
1002 return a[1] < b[1] or
1003 (a[1] == b[1] and a[2] < b[2])
1004end
1005
1006-- Calculate the huffman bit length and huffman code.
1007-- @param symbol_count: A table whose table key is the symbol, and table value
1008-- is the symbol frenquency (nil means 0 frequency).
1009-- @param max_bitlen: See description of return value.
1010-- @param max_symbol: The maximum symbol
1011-- @return a table whose key is the symbol, and the value is the huffman bit
1012-- bit length. We guarantee that all bit length <= max_bitlen.
1013-- For 0<=symbol<=max_symbol, table value could be nil if the frequency
1014-- of the symbol is 0 or nil.
1015-- @return a table whose key is the symbol, and the value is the huffman code.
1016-- @return a number indicating the maximum symbol whose bitlen is not 0.
1017local function GetHuffmanBitlenAndCode(symbol_counts, max_bitlen, max_symbol)
1018 local heap_size
1019 local max_non_zero_bitlen_symbol = -1
1020 local leafs = {}
1021 local heap = {}
1022 local symbol_bitlens = {}
1023 local symbol_codes = {}
1024 local bitlen_counts = {}
1025
1026 --[[
1027 tree[1]: weight, temporarily used as parent and bitLengths
1028 tree[2]: symbol
1029 tree[3]: left child
1030 tree[4]: right child
1031 --]]
1032 local number_unique_symbols = 0
1033 for symbol, count in pairs(symbol_counts) do
1034 number_unique_symbols = number_unique_symbols + 1
1035 leafs[number_unique_symbols] = {count, symbol}
1036 end
1037
1038 if (number_unique_symbols == 0) then
1039 -- no code.
1040 return {}, {}, -1
1041 elseif (number_unique_symbols == 1) then
1042 -- Only one code. In this case, its huffman code
1043 -- needs to be assigned as 0, and bit length is 1.
1044 -- This is the only case that the return result
1045 -- represents an imcomplete huffman tree.
1046 local symbol = leafs[1][2]
1047 symbol_bitlens[symbol] = 1
1048 symbol_codes[symbol] = 0
1049 return symbol_bitlens, symbol_codes, symbol
1050 else
1051 table_sort(leafs, SortByFirstThenSecond)
1052 heap_size = number_unique_symbols
1053 for i = 1, heap_size do
1054 heap[i] = leafs[i]
1055 end
1056
1057 while (heap_size > 1) do
1058 -- Note: pop does not change table size of heap
1059 local leftChild = MinHeapPop(heap, heap_size)
1060 heap_size = heap_size - 1
1061 local rightChild = MinHeapPop(heap, heap_size)
1062 heap_size = heap_size - 1
1063 local newNode =
1064 {leftChild[1]+rightChild[1], -1, leftChild, rightChild}
1065 MinHeapPush(heap, newNode, heap_size)
1066 heap_size = heap_size + 1
1067 end
1068
1069 -- Number of leafs whose bit length is greater than max_len.
1070 local number_bitlen_overflow = 0
1071
1072 -- Calculate bit length of all nodes
1073 local fifo = {heap[1], 0, 0, 0} -- preallocate some spaces.
1074 local fifo_size = 1
1075 local index = 1
1076 heap[1][1] = 0
1077 while (index <= fifo_size) do -- Breath first search
1078 local e = fifo[index]
1079 local bitlen = e[1]
1080 local symbol = e[2]
1081 local left_child = e[3]
1082 local right_child = e[4]
1083 if left_child then
1084 fifo_size = fifo_size + 1
1085 fifo[fifo_size] = left_child
1086 left_child[1] = bitlen + 1
1087 end
1088 if right_child then
1089 fifo_size = fifo_size + 1
1090 fifo[fifo_size] = right_child
1091 right_child[1] = bitlen + 1
1092 end
1093 index = index + 1
1094
1095 if (bitlen > max_bitlen) then
1096 number_bitlen_overflow = number_bitlen_overflow + 1
1097 bitlen = max_bitlen
1098 end
1099 if symbol >= 0 then
1100 symbol_bitlens[symbol] = bitlen
1101 max_non_zero_bitlen_symbol =
1102 (symbol > max_non_zero_bitlen_symbol)
1103 and symbol or max_non_zero_bitlen_symbol
1104 bitlen_counts[bitlen] = (bitlen_counts[bitlen] or 0) + 1
1105 end
1106 end
1107
1108 -- Resolve bit length overflow
1109 -- @see ZLib/trees.c:gen_bitlen(s, desc), for reference
1110 if (number_bitlen_overflow > 0) then
1111 repeat
1112 local bitlen = max_bitlen - 1
1113 while ((bitlen_counts[bitlen] or 0) == 0) do
1114 bitlen = bitlen - 1
1115 end
1116 -- move one leaf down the tree
1117 bitlen_counts[bitlen] = bitlen_counts[bitlen] - 1
1118 -- move one overflow item as its brother
1119 bitlen_counts[bitlen+1] = (bitlen_counts[bitlen+1] or 0) + 2
1120 bitlen_counts[max_bitlen] = bitlen_counts[max_bitlen] - 1
1121 number_bitlen_overflow = number_bitlen_overflow - 2
1122 until (number_bitlen_overflow <= 0)
1123
1124 index = 1
1125 for bitlen = max_bitlen, 1, -1 do
1126 local n = bitlen_counts[bitlen] or 0
1127 while (n > 0) do
1128 local symbol = leafs[index][2]
1129 symbol_bitlens[symbol] = bitlen
1130 n = n - 1
1131 index = index + 1
1132 end
1133 end
1134 end
1135
1136 symbol_codes = GetHuffmanCodeFromBitlen(bitlen_counts, symbol_bitlens,
1137 max_symbol, max_bitlen)
1138 return symbol_bitlens, symbol_codes, max_non_zero_bitlen_symbol
1139 end
1140end
1141
1142-- Calculate the first huffman header in the dynamic huffman block
1143-- @see RFC1951 Page 12
1144-- @param lcode_bitlen: The huffman bit length of literal/LZ77_length.
1145-- @param max_non_zero_bitlen_lcode: The maximum literal/LZ77_length symbol
1146-- whose huffman bit length is not zero.
1147-- @param dcode_bitlen: The huffman bit length of LZ77 distance.
1148-- @param max_non_zero_bitlen_dcode: The maximum LZ77 distance symbol
1149-- whose huffman bit length is not zero.
1150-- @return The run length encoded codes.
1151-- @return The extra bits. One entry for each rle code that needs extra bits.
1152-- (code == 16 or 17 or 18).
1153-- @return The count of appearance of each rle codes.
1154local function RunLengthEncodeHuffmanBitlen(
1155 lcode_bitlens,
1156 max_non_zero_bitlen_lcode,
1157 dcode_bitlens,
1158 max_non_zero_bitlen_dcode)
1159 local rle_code_tblsize = 0
1160 local rle_codes = {}
1161 local rle_code_counts = {}
1162 local rle_extra_bits_tblsize = 0
1163 local rle_extra_bits = {}
1164 local prev = nil
1165 local count = 0
1166
1167 -- If there is no distance code, assume one distance code of bit length 0.
1168 -- RFC1951: One distance code of zero bits means that
1169 -- there are no distance codes used at all (the data is all literals).
1170 max_non_zero_bitlen_dcode = (max_non_zero_bitlen_dcode < 0)
1171 and 0 or max_non_zero_bitlen_dcode
1172 local max_code = max_non_zero_bitlen_lcode+max_non_zero_bitlen_dcode+1
1173
1174 for code = 0, max_code+1 do
1175 local len = (code <= max_non_zero_bitlen_lcode)
1176 and (lcode_bitlens[code] or 0)
1177 or ((code <= max_code)
1178 and (dcode_bitlens[code-max_non_zero_bitlen_lcode-1] or 0) or nil)
1179 if len == prev then
1180 count = count + 1
1181 if len ~= 0 and count == 6 then
1182 rle_code_tblsize = rle_code_tblsize + 1
1183 rle_codes[rle_code_tblsize] = 16
1184 rle_extra_bits_tblsize = rle_extra_bits_tblsize + 1
1185 rle_extra_bits[rle_extra_bits_tblsize] = 3
1186 rle_code_counts[16] = (rle_code_counts[16] or 0) + 1
1187 count = 0
1188 elseif len == 0 and count == 138 then
1189 rle_code_tblsize = rle_code_tblsize + 1
1190 rle_codes[rle_code_tblsize] = 18
1191 rle_extra_bits_tblsize = rle_extra_bits_tblsize + 1
1192 rle_extra_bits[rle_extra_bits_tblsize] = 127
1193 rle_code_counts[18] = (rle_code_counts[18] or 0) + 1
1194 count = 0
1195 end
1196 else
1197 if count == 1 then
1198 rle_code_tblsize = rle_code_tblsize + 1
1199 rle_codes[rle_code_tblsize] = prev
1200 rle_code_counts[prev] = (rle_code_counts[prev] or 0) + 1
1201 elseif count == 2 then
1202 rle_code_tblsize = rle_code_tblsize + 1
1203 rle_codes[rle_code_tblsize] = prev
1204 rle_code_tblsize = rle_code_tblsize + 1
1205 rle_codes[rle_code_tblsize] = prev
1206 rle_code_counts[prev] = (rle_code_counts[prev] or 0) + 2
1207 elseif count >= 3 then
1208 rle_code_tblsize = rle_code_tblsize + 1
1209 local rleCode = (prev ~= 0) and 16 or (count <= 10 and 17 or 18)
1210 rle_codes[rle_code_tblsize] = rleCode
1211 rle_code_counts[rleCode] = (rle_code_counts[rleCode] or 0) + 1
1212 rle_extra_bits_tblsize = rle_extra_bits_tblsize + 1
1213 rle_extra_bits[rle_extra_bits_tblsize] =
1214 (count <= 10) and (count - 3) or (count - 11)
1215 end
1216
1217 prev = len
1218 if len and len ~= 0 then
1219 rle_code_tblsize = rle_code_tblsize + 1
1220 rle_codes[rle_code_tblsize] = len
1221 rle_code_counts[len] = (rle_code_counts[len] or 0) + 1
1222 count = 0
1223 else
1224 count = 1
1225 end
1226 end
1227 end
1228
1229 return rle_codes, rle_extra_bits, rle_code_counts
1230end
1231
1232-- Load the string into a table, in order to speed up LZ77.
1233-- Loop unrolled 16 times to speed this function up.
1234-- @param str The string to be loaded.
1235-- @param t The load destination
1236-- @param start str[index] will be the first character to be loaded.
1237-- @param end str[index] will be the last character to be loaded
1238-- @param offset str[index] will be loaded into t[index-offset]
1239-- @return t
1240local function LoadStringToTable(str, t, start, stop, offset)
1241 local i = start - offset
1242 while i <= stop - 15 - offset do
1243 t[i], t[i+1], t[i+2], t[i+3], t[i+4], t[i+5], t[i+6], t[i+7], t[i+8],
1244 t[i+9], t[i+10], t[i+11], t[i+12], t[i+13], t[i+14], t[i+15] =
1245 string_byte(str, i + offset, i + 15 + offset)
1246 i = i + 16
1247 end
1248 while (i <= stop - offset) do
1249 t[i] = string_byte(str, i + offset, i + offset)
1250 i = i + 1
1251 end
1252 return t
1253end
1254
1255-- Do LZ77 process. This function uses the majority of the CPU time.
1256-- @see zlib/deflate.c:deflate_fast(), zlib/deflate.c:deflate_slow()
1257-- @see https://github.com/madler/zlib/blob/master/doc/algorithm.txt
1258-- This function uses the algorithms used above. You should read the
1259-- algorithm.txt above to understand what is the hash function and the
1260-- lazy evaluation.
1261--
1262-- The special optimization used here is hash functions used here.
1263-- The hash function is just the multiplication of the three consective
1264-- characters. So if the hash matches, it guarantees 3 characters are matched.
1265-- This optimization can be implemented because Lua table is a hash table.
1266--
1267-- @param level integer that describes compression level.
1268-- @param string_table table that stores the value of string to be compressed.
1269-- The index of this table starts from 1.
1270-- The caller needs to make sure all values needed by this function
1271-- are loaded.
1272-- Assume "str" is the origin input string into the compressor
1273-- str[block_start]..str[block_end+3] needs to be loaded into
1274-- string_table[block_start-offset]..string_table[block_end-offset]
1275-- If dictionary is presented, the last 258 bytes of the dictionary
1276-- needs to be loaded into sing_table[-257..0]
1277-- (See more in the description of offset.)
1278-- @param hash_tables. The table key is the hash value (0<=hash<=16777216=256^3)
1279-- The table value is an array0 that stores the indexes of the
1280-- input data string to be compressed, such that
1281-- hash == str[index]*str[index+1]*str[index+2]
1282-- Indexes are ordered in this array.
1283-- @param block_start The indexes of the input data string to be compressed.
1284-- that starts the LZ77 block.
1285-- @param block_end The indexes of the input data string to be compressed.
1286-- that stores the LZ77 block.
1287-- @param offset str[index] is stored in string_table[index-offset],
1288-- This offset is mainly an optimization to limit the index
1289-- of string_table, so lua can access this table quicker.
1290-- @param dictionary See LibDeflate:CreateDictionary
1291-- @return literal/LZ77_length deflate codes.
1292-- @return the extra bits of literal/LZ77_length deflate codes.
1293-- @return the count of each literal/LZ77 deflate code.
1294-- @return LZ77 distance deflate codes.
1295-- @return the extra bits of LZ77 distance deflate codes.
1296-- @return the count of each LZ77 distance deflate code.
1297local function GetBlockLZ77Result(level, string_table, hash_tables, block_start,
1298 block_end, offset, dictionary)
1299 local config = _compression_level_configs[level]
1300 local config_use_lazy
1301 , config_good_prev_length
1302 , config_max_lazy_match
1303 , config_nice_length
1304 , config_max_hash_chain =
1305 config[1], config[2], config[3], config[4], config[5]
1306
1307 local config_max_insert_length = (not config_use_lazy)
1308 and config_max_lazy_match or 2147483646
1309 local config_good_hash_chain =
1310 (config_max_hash_chain-config_max_hash_chain%4/4)
1311
1312 local hash
1313
1314 local dict_hash_tables
1315 local dict_string_table
1316 local dict_string_len = 0
1317
1318 if dictionary then
1319 dict_hash_tables = dictionary.hash_tables
1320 dict_string_table = dictionary.string_table
1321 dict_string_len = dictionary.strlen
1322 assert(block_start == 1)
1323 if block_end >= block_start and dict_string_len >= 2 then
1324 hash = dict_string_table[dict_string_len-1]*65536
1325 + dict_string_table[dict_string_len]*256 + string_table[1]
1326 local t = hash_tables[hash]
1327 if not t then t = {}; hash_tables[hash] = t end
1328 t[#t+1] = -1
1329 end
1330 if block_end >= block_start+1 and dict_string_len >= 1 then
1331 hash = dict_string_table[dict_string_len]*65536
1332 + string_table[1]*256 + string_table[2]
1333 local t = hash_tables[hash]
1334 if not t then t = {}; hash_tables[hash] = t end
1335 t[#t+1] = 0
1336 end
1337 end
1338
1339 hash = (string_table[block_start-offset] or 0)*256
1340 + (string_table[block_start+1-offset] or 0)
1341
1342 local lcodes = {}
1343 local lcode_tblsize = 0
1344 local lcodes_counts = {}
1345 local dcodes = {}
1346 local dcodes_tblsize = 0
1347 local dcodes_counts = {}
1348
1349 local lextra_bits = {}
1350 local lextra_bits_tblsize = 0
1351 local dextra_bits = {}
1352 local dextra_bits_tblsize = 0
1353
1354 local match_available = false
1355 local prev_len
1356 local prev_dist
1357 local cur_len = 0
1358 local cur_dist = 0
1359
1360 local index = block_start
1361 local index_end = block_end + (config_use_lazy and 1 or 0)
1362
1363 -- the zlib source code writes separate code for lazy evaluation and
1364 -- not lazy evaluation, which is easier to understand.
1365 -- I put them together, so it is a bit harder to understand.
1366 -- because I think this is easier for me to maintain it.
1367 while (index <= index_end) do
1368 local string_table_index = index - offset
1369 prev_len = cur_len
1370 prev_dist = cur_dist
1371 cur_len = 0
1372
1373 hash = (hash*256+(string_table[string_table_index+2] or 0))%16777216
1374
1375 local chain_index
1376 local cur_chain
1377 local hash_chain = hash_tables[hash]
1378 local chain_old_size
1379 if not hash_chain then
1380 chain_old_size = 0
1381 hash_chain = {}
1382 hash_tables[hash] = hash_chain
1383 if dict_hash_tables then
1384 cur_chain = dict_hash_tables[hash]
1385 chain_index = cur_chain and #cur_chain or 0
1386 else
1387 chain_index = 0
1388 end
1389 else
1390 chain_old_size = #hash_chain
1391 cur_chain = hash_chain
1392 chain_index = chain_old_size
1393 end
1394
1395 if index <= block_end then
1396 hash_chain[chain_old_size+1] = index
1397 end
1398
1399 if (chain_index > 0 and index + 2 <= block_end
1400 and (not config_use_lazy or prev_len < config_max_lazy_match)) then
1401
1402 local depth =
1403 (config_use_lazy and prev_len >= config_good_prev_length)
1404 and config_good_hash_chain or config_max_hash_chain
1405
1406 while chain_index >= 1 and depth > 0 do
1407 local prev = cur_chain[chain_index]
1408
1409 if index - prev > 32768 then
1410 break
1411 end
1412 if prev < index then
1413 local j = 3
1414
1415 if prev >= -257 then
1416 local prev_table_index = prev-offset
1417 -- NOTE for author:
1418 -- j < 258 and index + j <= block_end
1419 -- This is the right condition
1420 while (j < 258 and index + j <= block_end) do
1421 if (string_table[prev_table_index+j]
1422 == string_table[string_table_index+j]) then
1423 j = j + 1
1424 else
1425 break
1426 end
1427 end
1428 else
1429 local prev_table_index = dict_string_len+prev
1430 -- NOTE for author:
1431 -- j < 258 and index + j <= block_end
1432 -- This is the right condition
1433 while (j < 258 and index + j <= block_end) do
1434 if (dict_string_table[prev_table_index+j]
1435 == string_table[string_table_index+j]) then
1436 j = j + 1
1437 else
1438 break
1439 end
1440 end
1441 end
1442 if j > cur_len then
1443 cur_len = j
1444 cur_dist = index - prev
1445 end
1446 if cur_len >= config_nice_length then
1447 break
1448 end
1449 end
1450
1451 chain_index = chain_index - 1
1452 depth = depth - 1
1453 if chain_index == 0 and prev > 0 and dict_hash_tables then
1454 cur_chain = dict_hash_tables[hash]
1455 chain_index = cur_chain and #cur_chain or 0
1456 end
1457 end
1458 end
1459
1460 if not config_use_lazy then
1461 prev_len, prev_dist = cur_len, cur_dist
1462 end
1463 if ((not config_use_lazy or match_available)
1464 and (prev_len > 3 or (prev_len == 3 and prev_dist < 4096))
1465 and cur_len <= prev_len )then
1466 local code = _length_to_deflate_code[prev_len]
1467 local length_extra_bits_bitlen =
1468 _length_to_deflate_extra_bitlen[prev_len]
1469 local dist_code, dist_extra_bits_bitlen, dist_extra_bits
1470 if prev_dist <= 256 then -- have cached code for small distance.
1471 dist_code = _dist256_to_deflate_code[prev_dist]
1472 dist_extra_bits = _dist256_to_deflate_extra_bits[prev_dist]
1473 dist_extra_bits_bitlen =
1474 _dist256_to_deflate_extra_bitlen[prev_dist]
1475 else
1476 dist_code = 16
1477 dist_extra_bits_bitlen = 7
1478 local a = 384
1479 local b = 512
1480
1481 while true do
1482 if prev_dist <= a then
1483 dist_extra_bits = (prev_dist-(b/2)-1) % (b/4)
1484 break
1485 elseif prev_dist <= b then
1486 dist_extra_bits = (prev_dist-(b/2)-1) % (b/4)
1487 dist_code = dist_code + 1
1488 break
1489 else
1490 dist_code = dist_code + 2
1491 dist_extra_bits_bitlen = dist_extra_bits_bitlen + 1
1492 a = a*2
1493 b = b*2
1494 end
1495 end
1496 end
1497 lcode_tblsize = lcode_tblsize + 1
1498 lcodes[lcode_tblsize] = code
1499 lcodes_counts[code] = (lcodes_counts[code] or 0) + 1
1500
1501 dcodes_tblsize = dcodes_tblsize + 1
1502 dcodes[dcodes_tblsize] = dist_code
1503 dcodes_counts[dist_code] = (dcodes_counts[dist_code] or 0) + 1
1504
1505 if length_extra_bits_bitlen > 0 then
1506 local lenExtraBits = _length_to_deflate_extra_bits[prev_len]
1507 lextra_bits_tblsize = lextra_bits_tblsize + 1
1508 lextra_bits[lextra_bits_tblsize] = lenExtraBits
1509 end
1510 if dist_extra_bits_bitlen > 0 then
1511 dextra_bits_tblsize = dextra_bits_tblsize + 1
1512 dextra_bits[dextra_bits_tblsize] = dist_extra_bits
1513 end
1514
1515 for i=index+1, index+prev_len-(config_use_lazy and 2 or 1) do
1516 hash = (hash*256+(string_table[i-offset+2] or 0))%16777216
1517 if prev_len <= config_max_insert_length then
1518 hash_chain = hash_tables[hash]
1519 if not hash_chain then
1520 hash_chain = {}
1521 hash_tables[hash] = hash_chain
1522 end
1523 hash_chain[#hash_chain+1] = i
1524 end
1525 end
1526 index = index + prev_len - (config_use_lazy and 1 or 0)
1527 match_available = false
1528 elseif (not config_use_lazy) or match_available then
1529 local code = string_table[config_use_lazy
1530 and (string_table_index-1) or string_table_index]
1531 lcode_tblsize = lcode_tblsize + 1
1532 lcodes[lcode_tblsize] = code
1533 lcodes_counts[code] = (lcodes_counts[code] or 0) + 1
1534 index = index + 1
1535 else
1536 match_available = true
1537 index = index + 1
1538 end
1539 end
1540
1541 -- Write "end of block" symbol
1542 lcode_tblsize = lcode_tblsize + 1
1543 lcodes[lcode_tblsize] = 256
1544 lcodes_counts[256] = (lcodes_counts[256] or 0) + 1
1545
1546 return lcodes, lextra_bits, lcodes_counts, dcodes, dextra_bits
1547 , dcodes_counts
1548end
1549
1550-- Get the header data of dynamic block.
1551-- @param lcodes_count The count of each literal/LZ77_length codes.
1552-- @param dcodes_count The count of each Lz77 distance codes.
1553-- @return a lots of stuffs.
1554-- @see RFC1951 Page 12
1555local function GetBlockDynamicHuffmanHeader(lcodes_counts, dcodes_counts)
1556 local lcodes_huffman_bitlens, lcodes_huffman_codes
1557 , max_non_zero_bitlen_lcode =
1558 GetHuffmanBitlenAndCode(lcodes_counts, 15, 285)
1559 local dcodes_huffman_bitlens, dcodes_huffman_codes
1560 , max_non_zero_bitlen_dcode =
1561 GetHuffmanBitlenAndCode(dcodes_counts, 15, 29)
1562
1563 local rle_deflate_codes, rle_extra_bits, rle_codes_counts =
1564 RunLengthEncodeHuffmanBitlen(lcodes_huffman_bitlens
1565 ,max_non_zero_bitlen_lcode, dcodes_huffman_bitlens
1566 , max_non_zero_bitlen_dcode)
1567
1568 local rle_codes_huffman_bitlens, rle_codes_huffman_codes =
1569 GetHuffmanBitlenAndCode(rle_codes_counts, 7, 18)
1570
1571 local HCLEN = 0
1572 for i = 1, 19 do
1573 local symbol = _rle_codes_huffman_bitlen_order[i]
1574 local length = rle_codes_huffman_bitlens[symbol] or 0
1575 if length ~= 0 then
1576 HCLEN = i
1577 end
1578 end
1579
1580 HCLEN = HCLEN - 4
1581 local HLIT = max_non_zero_bitlen_lcode + 1 - 257
1582 local HDIST = max_non_zero_bitlen_dcode + 1 - 1
1583 if HDIST < 0 then HDIST = 0 end
1584
1585 return HLIT, HDIST, HCLEN, rle_codes_huffman_bitlens
1586 , rle_codes_huffman_codes, rle_deflate_codes, rle_extra_bits
1587 , lcodes_huffman_bitlens, lcodes_huffman_codes
1588 , dcodes_huffman_bitlens, dcodes_huffman_codes
1589end
1590
1591-- Get the size of dynamic block without writing any bits into the writer.
1592-- @param ... Read the source code of GetBlockDynamicHuffmanHeader()
1593-- @return the bit length of the dynamic block
1594local function GetDynamicHuffmanBlockSize(lcodes, dcodes, HCLEN
1595 , rle_codes_huffman_bitlens, rle_deflate_codes
1596 , lcodes_huffman_bitlens, dcodes_huffman_bitlens)
1597
1598 local block_bitlen = 17 -- 1+2+5+5+4
1599 block_bitlen = block_bitlen + (HCLEN+4)*3
1600
1601 for i = 1, #rle_deflate_codes do
1602 local code = rle_deflate_codes[i]
1603 block_bitlen = block_bitlen + rle_codes_huffman_bitlens[code]
1604 if code >= 16 then
1605 block_bitlen = block_bitlen +
1606 ((code == 16) and 2 or (code == 17 and 3 or 7))
1607 end
1608 end
1609
1610 local length_code_count = 0
1611 for i = 1, #lcodes do
1612 local code = lcodes[i]
1613 local huffman_bitlen = lcodes_huffman_bitlens[code]
1614 block_bitlen = block_bitlen + huffman_bitlen
1615 if code > 256 then -- Length code
1616 length_code_count = length_code_count + 1
1617 if code > 264 and code < 285 then -- Length code with extra bits
1618 local extra_bits_bitlen =
1619 _literal_deflate_code_to_extra_bitlen[code-256]
1620 block_bitlen = block_bitlen + extra_bits_bitlen
1621 end
1622 local dist_code = dcodes[length_code_count]
1623 local dist_huffman_bitlen = dcodes_huffman_bitlens[dist_code]
1624 block_bitlen = block_bitlen + dist_huffman_bitlen
1625
1626 if dist_code > 3 then -- dist code with extra bits
1627 local dist_extra_bits_bitlen = (dist_code-dist_code%2)/2 - 1
1628 block_bitlen = block_bitlen + dist_extra_bits_bitlen
1629 end
1630 end
1631 end
1632 return block_bitlen
1633end
1634
1635-- Write dynamic block.
1636-- @param ... Read the source code of GetBlockDynamicHuffmanHeader()
1637local function CompressDynamicHuffmanBlock(WriteBits, is_last_block
1638 , lcodes, lextra_bits, dcodes, dextra_bits, HLIT, HDIST, HCLEN
1639 , rle_codes_huffman_bitlens, rle_codes_huffman_codes
1640 , rle_deflate_codes, rle_extra_bits
1641 , lcodes_huffman_bitlens, lcodes_huffman_codes
1642 , dcodes_huffman_bitlens, dcodes_huffman_codes)
1643
1644 WriteBits(is_last_block and 1 or 0, 1) -- Last block identifier
1645 WriteBits(2, 2) -- Dynamic Huffman block identifier
1646
1647 WriteBits(HLIT, 5)
1648 WriteBits(HDIST, 5)
1649 WriteBits(HCLEN, 4)
1650
1651 for i = 1, HCLEN+4 do
1652 local symbol = _rle_codes_huffman_bitlen_order[i]
1653 local length = rle_codes_huffman_bitlens[symbol] or 0
1654 WriteBits(length, 3)
1655 end
1656
1657 local rleExtraBitsIndex = 1
1658 for i=1, #rle_deflate_codes do
1659 local code = rle_deflate_codes[i]
1660 WriteBits(rle_codes_huffman_codes[code]
1661 , rle_codes_huffman_bitlens[code])
1662 if code >= 16 then
1663 local extraBits = rle_extra_bits[rleExtraBitsIndex]
1664 WriteBits(extraBits, (code == 16) and 2 or (code == 17 and 3 or 7))
1665 rleExtraBitsIndex = rleExtraBitsIndex + 1
1666 end
1667 end
1668
1669 local length_code_count = 0
1670 local length_code_with_extra_count = 0
1671 local dist_code_with_extra_count = 0
1672
1673 for i=1, #lcodes do
1674 local deflate_codee = lcodes[i]
1675 local huffman_code = lcodes_huffman_codes[deflate_codee]
1676 local huffman_bitlen = lcodes_huffman_bitlens[deflate_codee]
1677 WriteBits(huffman_code, huffman_bitlen)
1678 if deflate_codee > 256 then -- Length code
1679 length_code_count = length_code_count + 1
1680 if deflate_codee > 264 and deflate_codee < 285 then
1681 -- Length code with extra bits
1682 length_code_with_extra_count = length_code_with_extra_count + 1
1683 local extra_bits = lextra_bits[length_code_with_extra_count]
1684 local extra_bits_bitlen =
1685 _literal_deflate_code_to_extra_bitlen[deflate_codee-256]
1686 WriteBits(extra_bits, extra_bits_bitlen)
1687 end
1688 -- Write distance code
1689 local dist_deflate_code = dcodes[length_code_count]
1690 local dist_huffman_code = dcodes_huffman_codes[dist_deflate_code]
1691 local dist_huffman_bitlen =
1692 dcodes_huffman_bitlens[dist_deflate_code]
1693 WriteBits(dist_huffman_code, dist_huffman_bitlen)
1694
1695 if dist_deflate_code > 3 then -- dist code with extra bits
1696 dist_code_with_extra_count = dist_code_with_extra_count + 1
1697 local dist_extra_bits = dextra_bits[dist_code_with_extra_count]
1698 local dist_extra_bits_bitlen =
1699 (dist_deflate_code-dist_deflate_code%2)/2 - 1
1700 WriteBits(dist_extra_bits, dist_extra_bits_bitlen)
1701 end
1702 end
1703 end
1704end
1705
1706-- Get the size of fixed block without writing any bits into the writer.
1707-- @param lcodes literal/LZ77_length deflate codes
1708-- @param decodes LZ77 distance deflate codes
1709-- @return the bit length of the fixed block
1710local function GetFixedHuffmanBlockSize(lcodes, dcodes)
1711 local block_bitlen = 3
1712 local length_code_count = 0
1713 for i=1, #lcodes do
1714 local code = lcodes[i]
1715 local huffman_bitlen = _fix_block_literal_huffman_bitlen[code]
1716 block_bitlen = block_bitlen + huffman_bitlen
1717 if code > 256 then -- Length code
1718 length_code_count = length_code_count + 1
1719 if code > 264 and code < 285 then -- Length code with extra bits
1720 local extra_bits_bitlen =
1721 _literal_deflate_code_to_extra_bitlen[code-256]
1722 block_bitlen = block_bitlen + extra_bits_bitlen
1723 end
1724 local dist_code = dcodes[length_code_count]
1725 block_bitlen = block_bitlen + 5
1726
1727 if dist_code > 3 then -- dist code with extra bits
1728 local dist_extra_bits_bitlen =
1729 (dist_code-dist_code%2)/2 - 1
1730 block_bitlen = block_bitlen + dist_extra_bits_bitlen
1731 end
1732 end
1733 end
1734 return block_bitlen
1735end
1736
1737-- Write fixed block.
1738-- @param lcodes literal/LZ77_length deflate codes
1739-- @param decodes LZ77 distance deflate codes
1740local function CompressFixedHuffmanBlock(WriteBits, is_last_block,
1741 lcodes, lextra_bits, dcodes, dextra_bits)
1742 WriteBits(is_last_block and 1 or 0, 1) -- Last block identifier
1743 WriteBits(1, 2) -- Fixed Huffman block identifier
1744 local length_code_count = 0
1745 local length_code_with_extra_count = 0
1746 local dist_code_with_extra_count = 0
1747 for i=1, #lcodes do
1748 local deflate_code = lcodes[i]
1749 local huffman_code = _fix_block_literal_huffman_code[deflate_code]
1750 local huffman_bitlen = _fix_block_literal_huffman_bitlen[deflate_code]
1751 WriteBits(huffman_code, huffman_bitlen)
1752 if deflate_code > 256 then -- Length code
1753 length_code_count = length_code_count + 1
1754 if deflate_code > 264 and deflate_code < 285 then
1755 -- Length code with extra bits
1756 length_code_with_extra_count = length_code_with_extra_count + 1
1757 local extra_bits = lextra_bits[length_code_with_extra_count]
1758 local extra_bits_bitlen =
1759 _literal_deflate_code_to_extra_bitlen[deflate_code-256]
1760 WriteBits(extra_bits, extra_bits_bitlen)
1761 end
1762 -- Write distance code
1763 local dist_code = dcodes[length_code_count]
1764 local dist_huffman_code = _fix_block_dist_huffman_code[dist_code]
1765 WriteBits(dist_huffman_code, 5)
1766
1767 if dist_code > 3 then -- dist code with extra bits
1768 dist_code_with_extra_count = dist_code_with_extra_count + 1
1769 local dist_extra_bits = dextra_bits[dist_code_with_extra_count]
1770 local dist_extra_bits_bitlen = (dist_code-dist_code%2)/2 - 1
1771 WriteBits(dist_extra_bits, dist_extra_bits_bitlen)
1772 end
1773 end
1774 end
1775end
1776
1777-- Get the size of store block without writing any bits into the writer.
1778-- @param block_start The start index of the origin input string
1779-- @param block_end The end index of the origin input string
1780-- @param Total bit lens had been written into the compressed result before,
1781-- because store block needs to shift to byte boundary.
1782-- @return the bit length of the fixed block
1783local function GetStoreBlockSize(block_start, block_end, total_bitlen)
1784 assert(block_end-block_start+1 <= 65535)
1785 local block_bitlen = 3
1786 total_bitlen = total_bitlen + 3
1787 local padding_bitlen = (8-total_bitlen%8)%8
1788 block_bitlen = block_bitlen + padding_bitlen
1789 block_bitlen = block_bitlen + 32
1790 block_bitlen = block_bitlen + (block_end - block_start + 1) * 8
1791 return block_bitlen
1792end
1793
1794-- Write the store block.
1795-- @param ... lots of stuffs
1796-- @return nil
1797local function CompressStoreBlock(WriteBits, WriteString, is_last_block, str
1798 , block_start, block_end, total_bitlen)
1799 assert(block_end-block_start+1 <= 65535)
1800 WriteBits(is_last_block and 1 or 0, 1) -- Last block identifer.
1801 WriteBits(0, 2) -- Store block identifier.
1802 total_bitlen = total_bitlen + 3
1803 local padding_bitlen = (8-total_bitlen%8)%8
1804 if padding_bitlen > 0 then
1805 WriteBits(_pow2[padding_bitlen]-1, padding_bitlen)
1806 end
1807 local size = block_end - block_start + 1
1808 WriteBits(size, 16)
1809
1810 -- Write size's one's complement
1811 local comp = (255 - size % 256) + (255 - (size-size%256)/256)*256
1812 WriteBits(comp, 16)
1813
1814 WriteString(str:sub(block_start, block_end))
1815end
1816
1817-- Do the deflate
1818-- Currently using a simple way to determine the block size
1819-- (This is why the compression ratio is little bit worse than zlib when
1820-- the input size is very large
1821-- The first block is 64KB, the following block is 32KB.
1822-- After each block, there is a memory cleanup operation.
1823-- This is not a fast operation, but it is needed to save memory usage, so
1824-- the memory usage does not grow unboundly. If the data size is less than
1825-- 64KB, then memory cleanup won't happen.
1826-- This function determines whether to use store/fixed/dynamic blocks by
1827-- calculating the block size of each block type and chooses the smallest one.
1828local function Deflate(configs, WriteBits, WriteString, FlushWriter, str
1829 , dictionary)
1830 local string_table = {}
1831 local hash_tables = {}
1832 local is_last_block = nil
1833 local block_start
1834 local block_end
1835 local bitlen_written
1836 local total_bitlen = FlushWriter(_FLUSH_MODE_NO_FLUSH)
1837 local strlen = #str
1838 local offset
1839
1840 local level
1841 local strategy
1842 if configs then
1843 if configs.level then
1844 level = configs.level
1845 end
1846 if configs.strategy then
1847 strategy = configs.strategy
1848 end
1849 end
1850
1851 if not level then
1852 if strlen < 2048 then
1853 level = 7
1854 elseif strlen > 65536 then
1855 level = 3
1856 else
1857 level = 5
1858 end
1859 end
1860
1861 while not is_last_block do
1862 if not block_start then
1863 block_start = 1
1864 block_end = 64*1024 - 1
1865 offset = 0
1866 else
1867 block_start = block_end + 1
1868 block_end = block_end + 32*1024
1869 offset = block_start - 32*1024 - 1
1870 end
1871
1872 if block_end >= strlen then
1873 block_end = strlen
1874 is_last_block = true
1875 else
1876 is_last_block = false
1877 end
1878
1879 local lcodes, lextra_bits, lcodes_counts, dcodes, dextra_bits
1880 , dcodes_counts
1881
1882 local HLIT, HDIST, HCLEN, rle_codes_huffman_bitlens
1883 , rle_codes_huffman_codes, rle_deflate_codes
1884 , rle_extra_bits, lcodes_huffman_bitlens, lcodes_huffman_codes
1885 , dcodes_huffman_bitlens, dcodes_huffman_codes
1886
1887 local dynamic_block_bitlen
1888 local fixed_block_bitlen
1889 local store_block_bitlen
1890
1891 if level ~= 0 then
1892
1893 -- GetBlockLZ77 needs block_start to block_end+3 to be loaded.
1894 LoadStringToTable(str, string_table, block_start, block_end + 3
1895 , offset)
1896 if block_start == 1 and dictionary then
1897 local dict_string_table = dictionary.string_table
1898 local dict_strlen = dictionary.strlen
1899 for i=0, (-dict_strlen+1)<-257
1900 and -257 or (-dict_strlen+1), -1 do
1901 string_table[i] = dict_string_table[dict_strlen+i]
1902 end
1903 end
1904
1905 if strategy == "huffman_only" then
1906 lcodes = {}
1907 LoadStringToTable(str, lcodes, block_start, block_end
1908 , block_start-1)
1909 lextra_bits = {}
1910 lcodes_counts = {}
1911 lcodes[block_end - block_start+2] = 256 -- end of block
1912 for i=1, block_end - block_start+2 do
1913 local code = lcodes[i]
1914 lcodes_counts[code] = (lcodes_counts[code] or 0) + 1
1915 end
1916 dcodes = {}
1917 dextra_bits = {}
1918 dcodes_counts = {}
1919 else
1920 lcodes, lextra_bits, lcodes_counts, dcodes, dextra_bits
1921 , dcodes_counts = GetBlockLZ77Result(level, string_table
1922 , hash_tables, block_start, block_end, offset, dictionary
1923 )
1924 end
1925
1926 HLIT, HDIST, HCLEN, rle_codes_huffman_bitlens
1927 , rle_codes_huffman_codes, rle_deflate_codes
1928 , rle_extra_bits, lcodes_huffman_bitlens, lcodes_huffman_codes
1929 , dcodes_huffman_bitlens, dcodes_huffman_codes =
1930 GetBlockDynamicHuffmanHeader(lcodes_counts, dcodes_counts)
1931 dynamic_block_bitlen = GetDynamicHuffmanBlockSize(
1932 lcodes, dcodes, HCLEN, rle_codes_huffman_bitlens
1933 , rle_deflate_codes, lcodes_huffman_bitlens
1934 , dcodes_huffman_bitlens)
1935 fixed_block_bitlen = GetFixedHuffmanBlockSize(lcodes, dcodes)
1936 end
1937
1938 store_block_bitlen = GetStoreBlockSize(block_start, block_end
1939 , total_bitlen)
1940
1941 local min_bitlen = store_block_bitlen
1942 min_bitlen = (fixed_block_bitlen and fixed_block_bitlen < min_bitlen)
1943 and fixed_block_bitlen or min_bitlen
1944 min_bitlen = (dynamic_block_bitlen
1945 and dynamic_block_bitlen < min_bitlen)
1946 and dynamic_block_bitlen or min_bitlen
1947
1948 if level == 0 or (strategy ~= "fixed" and strategy ~= "dynamic" and
1949 store_block_bitlen == min_bitlen) then
1950 CompressStoreBlock(WriteBits, WriteString, is_last_block
1951 , str, block_start, block_end, total_bitlen)
1952 total_bitlen = total_bitlen + store_block_bitlen
1953 elseif strategy ~= "dynamic" and (
1954 strategy == "fixed" or fixed_block_bitlen == min_bitlen) then
1955 CompressFixedHuffmanBlock(WriteBits, is_last_block,
1956 lcodes, lextra_bits, dcodes, dextra_bits)
1957 total_bitlen = total_bitlen + fixed_block_bitlen
1958 elseif strategy == "dynamic" or dynamic_block_bitlen == min_bitlen then
1959 CompressDynamicHuffmanBlock(WriteBits, is_last_block, lcodes
1960 , lextra_bits, dcodes, dextra_bits, HLIT, HDIST, HCLEN
1961 , rle_codes_huffman_bitlens, rle_codes_huffman_codes
1962 , rle_deflate_codes, rle_extra_bits
1963 , lcodes_huffman_bitlens, lcodes_huffman_codes
1964 , dcodes_huffman_bitlens, dcodes_huffman_codes)
1965 total_bitlen = total_bitlen + dynamic_block_bitlen
1966 end
1967
1968 if is_last_block then
1969 bitlen_written = FlushWriter(_FLUSH_MODE_NO_FLUSH)
1970 else
1971 bitlen_written = FlushWriter(_FLUSH_MODE_MEMORY_CLEANUP)
1972 end
1973
1974 assert(bitlen_written == total_bitlen)
1975
1976 -- Memory clean up, so memory consumption does not always grow linearly
1977 -- , even if input string is > 64K.
1978 -- Not a very efficient operation, but this operation won't happen
1979 -- when the input data size is less than 64K.
1980 if not is_last_block then
1981 local j
1982 if dictionary and block_start == 1 then
1983 j = 0
1984 while (string_table[j]) do
1985 string_table[j] = nil
1986 j = j - 1
1987 end
1988 end
1989 dictionary = nil
1990 j = 1
1991 for i = block_end-32767, block_end do
1992 string_table[j] = string_table[i-offset]
1993 j = j + 1
1994 end
1995 for k, t in pairs(hash_tables) do
1996 local tSize = #t
1997 if tSize > 0 and block_end+1 - t[1] > 32768 then
1998 if tSize == 1 then
1999 --hash_tables[k] = nil -- seems to be causing problems
2000 else
2001 local new = {}
2002 local newSize = 0
2003 for i = 2, tSize do
2004 j = t[i]
2005 if block_end+1 - j <= 32768 then
2006 newSize = newSize + 1
2007 new[newSize] = j
2008 end
2009 end
2010 hash_tables[k] = new
2011 end
2012 end
2013 end
2014 end
2015
2016 if os and os.pullEvent then -- ComputerCraft requires this for long-running processes
2017 os.queueEvent("nosleep")
2018 os.pullEvent()
2019 end
2020 end
2021end
2022
2023--- The description to compression configuration table. <br>
2024-- Any field can be nil to use its default. <br>
2025-- Table with keys other than those below is an invalid table.
2026-- @class table
2027-- @name compression_configs
2028-- @field level The compression level ranged from 0 to 9. 0 is no compression.
2029-- 9 is the slowest but best compression. Use nil for default level.
2030-- @field strategy The compression strategy. "fixed" to only use fixed deflate
2031-- compression block. "dynamic" to only use dynamic block. "huffman_only" to
2032-- do no LZ77 compression. Only do huffman compression.
2033
2034
2035-- @see LibDeflate:CompressDeflate(str, configs)
2036-- @see LibDeflate:CompressDeflateWithDict(str, dictionary, configs)
2037local function CompressDeflateInternal(str, dictionary, configs)
2038 local WriteBits, WriteString, FlushWriter = CreateWriter()
2039 Deflate(configs, WriteBits, WriteString, FlushWriter, str, dictionary)
2040 local total_bitlen, result = FlushWriter(_FLUSH_MODE_OUTPUT)
2041 local padding_bitlen = (8-total_bitlen%8)%8
2042 return result, padding_bitlen
2043end
2044
2045-- @see LibDeflate:CompressZlib
2046-- @see LibDeflate:CompressZlibWithDict
2047local function CompressZlibInternal(str, dictionary, configs)
2048 local WriteBits, WriteString, FlushWriter = CreateWriter()
2049
2050 local CM = 8 -- Compression method
2051 local CINFO = 7 --Window Size = 32K
2052 local CMF = CINFO*16+CM
2053 WriteBits(CMF, 8)
2054
2055 local FDIST = dictionary and 1 or 0
2056 local FLEVEL = 2 -- Default compression
2057 local FLG = FLEVEL*64+FDIST*32
2058 local FCHECK = (31-(CMF*256+FLG)%31)
2059 -- The FCHECK value must be such that CMF and FLG,
2060 -- when viewed as a 16-bit unsigned integer stored
2061 -- in MSB order (CMF*256 + FLG), is a multiple of 31.
2062 FLG = FLG + FCHECK
2063 WriteBits(FLG, 8)
2064
2065 if FDIST == 1 then
2066 local adler32 = dictionary.adler32
2067 local byte0 = adler32 % 256
2068 adler32 = (adler32 - byte0) / 256
2069 local byte1 = adler32 % 256
2070 adler32 = (adler32 - byte1) / 256
2071 local byte2 = adler32 % 256
2072 adler32 = (adler32 - byte2) / 256
2073 local byte3 = adler32 % 256
2074 WriteBits(byte3, 8)
2075 WriteBits(byte2, 8)
2076 WriteBits(byte1, 8)
2077 WriteBits(byte0, 8)
2078 end
2079
2080 Deflate(configs, WriteBits, WriteString, FlushWriter, str, dictionary)
2081 FlushWriter(_FLUSH_MODE_BYTE_BOUNDARY)
2082
2083 local adler32 = LibDeflate:Adler32(str)
2084
2085 -- Most significant byte first
2086 local byte3 = adler32%256
2087 adler32 = (adler32 - byte3) / 256
2088 local byte2 = adler32%256
2089 adler32 = (adler32 - byte2) / 256
2090 local byte1 = adler32%256
2091 adler32 = (adler32 - byte1) / 256
2092 local byte0 = adler32%256
2093
2094 WriteBits(byte0, 8)
2095 WriteBits(byte1, 8)
2096 WriteBits(byte2, 8)
2097 WriteBits(byte3, 8)
2098 local total_bitlen, result = FlushWriter(_FLUSH_MODE_OUTPUT)
2099 local padding_bitlen = (8-total_bitlen%8)%8
2100 return result, padding_bitlen
2101end
2102
2103--- Compress using the raw deflate format.
2104-- @param str [string] The data to be compressed.
2105-- @param configs [table/nil] The configuration table to control the compression
2106-- . If nil, use the default configuration.
2107-- @return [string] The compressed data.
2108-- @return [integer] The number of bits padded at the end of output.
2109-- 0 <= bits < 8 <br>
2110-- This means the most significant "bits" of the last byte of the returned
2111-- compressed data are padding bits and they don't affect decompression.
2112-- You don't need to use this value unless you want to do some postprocessing
2113-- to the compressed data.
2114-- @see compression_configs
2115-- @see LibDeflate:DecompressDeflate
2116function LibDeflate:CompressDeflate(str, configs)
2117 local arg_valid, arg_err = IsValidArguments(str, false, nil, true, configs)
2118 if not arg_valid then
2119 error(("Usage: LibDeflate:CompressDeflate(str, configs): "
2120 ..arg_err), 2)
2121 end
2122 return CompressDeflateInternal(str, nil, configs)
2123end
2124
2125--- Compress using the raw deflate format with a preset dictionary.
2126-- @param str [string] The data to be compressed.
2127-- @param dictionary [table] The preset dictionary produced by
2128-- LibDeflate:CreateDictionary
2129-- @param configs [table/nil] The configuration table to control the compression
2130-- . If nil, use the default configuration.
2131-- @return [string] The compressed data.
2132-- @return [integer] The number of bits padded at the end of output.
2133-- 0 <= bits < 8 <br>
2134-- This means the most significant "bits" of the last byte of the returned
2135-- compressed data are padding bits and they don't affect decompression.
2136-- You don't need to use this value unless you want to do some postprocessing
2137-- to the compressed data.
2138-- @see compression_configs
2139-- @see LibDeflate:CreateDictionary
2140-- @see LibDeflate:DecompressDeflateWithDict
2141function LibDeflate:CompressDeflateWithDict(str, dictionary, configs)
2142 local arg_valid, arg_err = IsValidArguments(str, true, dictionary
2143 , true, configs)
2144 if not arg_valid then
2145 error(("Usage: LibDeflate:CompressDeflateWithDict"
2146 .."(str, dictionary, configs): "
2147 ..arg_err), 2)
2148 end
2149 return CompressDeflateInternal(str, dictionary, configs)
2150end
2151
2152--- Compress using the zlib format.
2153-- @param str [string] the data to be compressed.
2154-- @param configs [table/nil] The configuration table to control the compression
2155-- . If nil, use the default configuration.
2156-- @return [string] The compressed data.
2157-- @return [integer] The number of bits padded at the end of output.
2158-- Should always be 0.
2159-- Zlib formatted compressed data never has padding bits at the end.
2160-- @see compression_configs
2161-- @see LibDeflate:DecompressZlib
2162function LibDeflate:CompressZlib(str, configs)
2163 local arg_valid, arg_err = IsValidArguments(str, false, nil, true, configs)
2164 if not arg_valid then
2165 error(("Usage: LibDeflate:CompressZlib(str, configs): "
2166 ..arg_err), 2)
2167 end
2168 return CompressZlibInternal(str, nil, configs)
2169end
2170
2171--- Compress using the zlib format with a preset dictionary.
2172-- @param str [string] the data to be compressed.
2173-- @param dictionary [table] A preset dictionary produced
2174-- by LibDeflate:CreateDictionary()
2175-- @param configs [table/nil] The configuration table to control the compression
2176-- . If nil, use the default configuration.
2177-- @return [string] The compressed data.
2178-- @return [integer] The number of bits padded at the end of output.
2179-- Should always be 0.
2180-- Zlib formatted compressed data never has padding bits at the end.
2181-- @see compression_configs
2182-- @see LibDeflate:CreateDictionary
2183-- @see LibDeflate:DecompressZlibWithDict
2184function LibDeflate:CompressZlibWithDict(str, dictionary, configs)
2185 local arg_valid, arg_err = IsValidArguments(str, true, dictionary
2186 , true, configs)
2187 if not arg_valid then
2188 error(("Usage: LibDeflate:CompressZlibWithDict"
2189 .."(str, dictionary, configs): "
2190 ..arg_err), 2)
2191 end
2192 return CompressZlibInternal(str, dictionary, configs)
2193end
2194
2195local function time()
2196 if os.epoch ~= nil then return math.floor(os.epoch("utc") / 1000)
2197 -- ComputerCraft's os.time() gives in-game time, os.epoch gives POSIX time in ms
2198 elseif os.time() < 30 then return 0 -- ComputerCraft 1.79 and below don't have os.epoch(), so no time.
2199 else return os.time() end -- All other Luas.
2200end
2201
2202local function byte(num, b) return band(rshift(num, b * 8), 0xFF) end
2203
2204--- Compress using the gzip format.
2205-- @param str [string] the data to be compressed.
2206-- @param configs [table/nil] The configuration table to control the compression
2207-- . If nil, use the default configuration.
2208-- @return [string] The compressed data with gzip headers.
2209-- @see compression_configs
2210-- @see LibDeflate:DecompressGzip
2211function LibDeflate:CompressGzip(str, configs)
2212 local arg_valid, arg_err = IsValidArguments(str, false, nil, true, configs)
2213 if not arg_valid then
2214 error(("Usage: LibDeflate:CompressGzip(str, configs): "
2215 ..arg_err), 2)
2216 end
2217 local res, err = CompressDeflateInternal(str, nil, configs)
2218 if res == nil then return res, err end
2219 local t = time()
2220 local cf = 0
2221 local crc = self:CRC32(str)
2222 local len = string.len(str)
2223 if configs ~= nil and configs.level ~= nil then
2224 if configs.level == 0 then cf = 0x04
2225 elseif configs.level == 9 then cf = 0x02 end
2226 end
2227 return string_char(0x1f, 0x8b, 8, 0, byte(t, 0), byte(t, 1), byte(t, 2),
2228 byte(t, 3), cf, 0xFF) .. res .. string_char(byte(crc, 0), byte(crc, 1),
2229 byte(crc, 2), byte(crc, 3), byte(len, 0), byte(len, 1), byte(len, 2), byte(len, 3))
2230end
2231
2232--[[ --------------------------------------------------------------------------
2233 Decompress code
2234--]] --------------------------------------------------------------------------
2235
2236--[[
2237 Create a reader to easily reader stuffs as the unit of bits.
2238 Return values:
2239 1. ReadBits(bitlen)
2240 2. ReadBytes(bytelen, buffer, buffer_size)
2241 3. Decode(huffman_bitlen_count, huffman_symbol, min_bitlen)
2242 4. ReaderBitlenLeft()
2243 5. SkipToByteBoundary()
2244--]]
2245local function CreateReader(input_string)
2246 local input = input_string
2247 local input_strlen = #input_string
2248 local input_next_byte_pos = 1
2249 local cache_bitlen = 0
2250 local cache = 0
2251
2252 -- Read some bits.
2253 -- To improve speed, this function does not
2254 -- check if the input has been exhausted.
2255 -- Use ReaderBitlenLeft() < 0 to check it.
2256 -- @param bitlen the number of bits to read
2257 -- @return the data is read.
2258 local function ReadBits(bitlen)
2259 local rshift_mask = _pow2[bitlen]
2260 local code
2261 if bitlen <= cache_bitlen then
2262 code = cache % rshift_mask
2263 cache = (cache - code) / rshift_mask
2264 cache_bitlen = cache_bitlen - bitlen
2265 else -- Whether input has been exhausted is not checked.
2266 local lshift_mask = _pow2[cache_bitlen]
2267 local byte1, byte2, byte3, byte4 = string_byte(input
2268 , input_next_byte_pos, input_next_byte_pos+3)
2269 -- This requires lua number to be at least double ()
2270 cache = cache + ((byte1 or 0)+(byte2 or 0)*256
2271 + (byte3 or 0)*65536+(byte4 or 0)*16777216)*lshift_mask
2272 input_next_byte_pos = input_next_byte_pos + 4
2273 cache_bitlen = cache_bitlen + 32 - bitlen
2274 code = cache % rshift_mask
2275 cache = (cache - code) / rshift_mask
2276 end
2277 return code
2278 end
2279
2280 -- Read some bytes from the reader.
2281 -- Assume reader is on the byte boundary.
2282 -- @param bytelen The number of bytes to be read.
2283 -- @param buffer The byte read will be stored into this buffer.
2284 -- @param buffer_size The buffer will be modified starting from
2285 -- buffer[buffer_size+1], ending at buffer[buffer_size+bytelen-1]
2286 -- @return the new buffer_size
2287 local function ReadBytes(bytelen, buffer, buffer_size)
2288 assert(cache_bitlen % 8 == 0)
2289
2290 local byte_from_cache = (cache_bitlen/8 < bytelen)
2291 and (cache_bitlen/8) or bytelen
2292 for _=1, byte_from_cache do
2293 local byte = cache % 256
2294 buffer_size = buffer_size + 1
2295 buffer[buffer_size] = string_char(byte)
2296 cache = (cache - byte) / 256
2297 end
2298 cache_bitlen = cache_bitlen - byte_from_cache*8
2299 bytelen = bytelen - byte_from_cache
2300 if (input_strlen - input_next_byte_pos - bytelen + 1) * 8
2301 + cache_bitlen < 0 then
2302 return -1 -- out of input
2303 end
2304 for i=input_next_byte_pos, input_next_byte_pos+bytelen-1 do
2305 buffer_size = buffer_size + 1
2306 buffer[buffer_size] = string_sub(input, i, i)
2307 end
2308
2309 input_next_byte_pos = input_next_byte_pos + bytelen
2310 return buffer_size
2311 end
2312
2313 -- Decode huffman code
2314 -- To improve speed, this function does not check
2315 -- if the input has been exhausted.
2316 -- Use ReaderBitlenLeft() < 0 to check it.
2317 -- Credits for Mark Adler. This code is from puff:Decode()
2318 -- @see puff:Decode(...)
2319 -- @param huffman_bitlen_count
2320 -- @param huffman_symbol
2321 -- @param min_bitlen The minimum huffman bit length of all symbols
2322 -- @return The decoded deflate code.
2323 -- Negative value is returned if decoding fails.
2324 local function Decode(huffman_bitlen_counts, huffman_symbols, min_bitlen)
2325 local code = 0
2326 local first = 0
2327 local index = 0
2328 local count
2329 if min_bitlen > 0 then
2330 if cache_bitlen < 15 and input then
2331 local lshift_mask = _pow2[cache_bitlen]
2332 local byte1, byte2, byte3, byte4 =
2333 string_byte(input, input_next_byte_pos
2334 , input_next_byte_pos+3)
2335 -- This requires lua number to be at least double ()
2336 cache = cache + ((byte1 or 0)+(byte2 or 0)*256
2337 +(byte3 or 0)*65536+(byte4 or 0)*16777216)*lshift_mask
2338 input_next_byte_pos = input_next_byte_pos + 4
2339 cache_bitlen = cache_bitlen + 32
2340 end
2341
2342 local rshift_mask = _pow2[min_bitlen]
2343 cache_bitlen = cache_bitlen - min_bitlen
2344 code = cache % rshift_mask
2345 cache = (cache - code) / rshift_mask
2346 -- Reverse the bits
2347 code = _reverse_bits_tbl[min_bitlen][code]
2348
2349 count = huffman_bitlen_counts[min_bitlen]
2350 if code < count then
2351 return huffman_symbols[code]
2352 end
2353 index = count
2354 first = count * 2
2355 code = code * 2
2356 end
2357
2358 for bitlen = min_bitlen+1, 15 do
2359 local bit
2360 bit = cache % 2
2361 cache = (cache - bit) / 2
2362 cache_bitlen = cache_bitlen - 1
2363
2364 code = (bit==1) and (code + 1 - code % 2) or code
2365 count = huffman_bitlen_counts[bitlen] or 0
2366 local diff = code - first
2367 if diff < count then
2368 return huffman_symbols[index + diff]
2369 end
2370 index = index + count
2371 first = first + count
2372 first = first * 2
2373 code = code * 2
2374 end
2375 -- invalid literal/length or distance code
2376 -- in fixed or dynamic block (run out of code)
2377 return -10
2378 end
2379
2380 local function ReaderBitlenLeft()
2381 return (input_strlen - input_next_byte_pos + 1) * 8 + cache_bitlen
2382 end
2383
2384 local function SkipToByteBoundary()
2385 local skipped_bitlen = cache_bitlen%8
2386 local rshift_mask = _pow2[skipped_bitlen]
2387 cache_bitlen = cache_bitlen - skipped_bitlen
2388 cache = (cache - cache % rshift_mask) / rshift_mask
2389 end
2390
2391 return ReadBits, ReadBytes, Decode, ReaderBitlenLeft, SkipToByteBoundary
2392end
2393
2394-- Create a deflate state, so I can pass in less arguments to functions.
2395-- @param str the whole string to be decompressed.
2396-- @param dictionary The preset dictionary. nil if not provided.
2397-- This dictionary should be produced by LibDeflate:CreateDictionary(str)
2398-- @return The decomrpess state.
2399local function CreateDecompressState(str, dictionary)
2400 local ReadBits, ReadBytes, Decode, ReaderBitlenLeft
2401 , SkipToByteBoundary = CreateReader(str)
2402 local state =
2403 {
2404 ReadBits = ReadBits,
2405 ReadBytes = ReadBytes,
2406 Decode = Decode,
2407 ReaderBitlenLeft = ReaderBitlenLeft,
2408 SkipToByteBoundary = SkipToByteBoundary,
2409 buffer_size = 0,
2410 buffer = {},
2411 result_buffer = {},
2412 dictionary = dictionary,
2413 }
2414 return state
2415end
2416
2417-- Get the stuffs needed to decode huffman codes
2418-- @see puff.c:construct(...)
2419-- @param huffman_bitlen The huffman bit length of the huffman codes.
2420-- @param max_symbol The maximum symbol
2421-- @param max_bitlen The min huffman bit length of all codes
2422-- @return zero or positive for success, negative for failure.
2423-- @return The count of each huffman bit length.
2424-- @return A table to convert huffman codes to deflate codes.
2425-- @return The minimum huffman bit length.
2426local function GetHuffmanForDecode(huffman_bitlens, max_symbol, max_bitlen)
2427 local huffman_bitlen_counts = {}
2428 local min_bitlen = max_bitlen
2429 for symbol = 0, max_symbol do
2430 local bitlen = huffman_bitlens[symbol] or 0
2431 min_bitlen = (bitlen > 0 and bitlen < min_bitlen)
2432 and bitlen or min_bitlen
2433 huffman_bitlen_counts[bitlen] = (huffman_bitlen_counts[bitlen] or 0)+1
2434 end
2435
2436 if huffman_bitlen_counts[0] == max_symbol+1 then -- No Codes
2437 return 0, huffman_bitlen_counts, {}, 0 -- Complete, but decode will fail
2438 end
2439
2440 local left = 1
2441 for len = 1, max_bitlen do
2442 left = left * 2
2443 left = left - (huffman_bitlen_counts[len] or 0)
2444 if left < 0 then
2445 return left -- Over-subscribed, return negative
2446 end
2447 end
2448
2449 -- Generate offsets info symbol table for each length for sorting
2450 local offsets = {}
2451 offsets[1] = 0
2452 for len = 1, max_bitlen-1 do
2453 offsets[len + 1] = offsets[len] + (huffman_bitlen_counts[len] or 0)
2454 end
2455
2456 local huffman_symbols = {}
2457 for symbol = 0, max_symbol do
2458 local bitlen = huffman_bitlens[symbol] or 0
2459 if bitlen ~= 0 then
2460 local offset = offsets[bitlen]
2461 huffman_symbols[offset] = symbol
2462 offsets[bitlen] = offsets[bitlen] + 1
2463 end
2464 end
2465
2466 -- Return zero for complete set, positive for incomplete set.
2467 return left, huffman_bitlen_counts, huffman_symbols, min_bitlen
2468end
2469
2470-- Decode a fixed or dynamic huffman blocks, excluding last block identifier
2471-- and block type identifer.
2472-- @see puff.c:codes()
2473-- @param state decompression state that will be modified by this function.
2474-- @see CreateDecompressState
2475-- @param ... Read the source code
2476-- @return 0 on success, other value on failure.
2477local function DecodeUntilEndOfBlock(state, lcodes_huffman_bitlens
2478 , lcodes_huffman_symbols, lcodes_huffman_min_bitlen
2479 , dcodes_huffman_bitlens, dcodes_huffman_symbols
2480 , dcodes_huffman_min_bitlen)
2481 local buffer, buffer_size, ReadBits, Decode, ReaderBitlenLeft
2482 , result_buffer =
2483 state.buffer, state.buffer_size, state.ReadBits, state.Decode
2484 , state.ReaderBitlenLeft, state.result_buffer
2485 local dictionary = state.dictionary
2486 local dict_string_table
2487 local dict_strlen
2488
2489 local buffer_end = 1
2490 if dictionary and not buffer[0] then
2491 -- If there is a dictionary, copy the last 258 bytes into
2492 -- the string_table to make the copy in the main loop quicker.
2493 -- This is done only once per decompression.
2494 dict_string_table = dictionary.string_table
2495 dict_strlen = dictionary.strlen
2496 buffer_end = -dict_strlen + 1
2497 for i=0, (-dict_strlen+1)<-257 and -257 or (-dict_strlen+1), -1 do
2498 buffer[i] = _byte_to_char[dict_string_table[dict_strlen+i]]
2499 end
2500 end
2501
2502 repeat
2503 local symbol = Decode(lcodes_huffman_bitlens
2504 , lcodes_huffman_symbols, lcodes_huffman_min_bitlen)
2505 if symbol < 0 or symbol > 285 then
2506 -- invalid literal/length or distance code in fixed or dynamic block
2507 return -10
2508 elseif symbol < 256 then -- Literal
2509 buffer_size = buffer_size + 1
2510 buffer[buffer_size] = _byte_to_char[symbol]
2511 elseif symbol > 256 then -- Length code
2512 symbol = symbol - 256
2513 local bitlen = _literal_deflate_code_to_base_len[symbol]
2514 bitlen = (symbol >= 8)
2515 and (bitlen
2516 + ReadBits(_literal_deflate_code_to_extra_bitlen[symbol]))
2517 or bitlen
2518 symbol = Decode(dcodes_huffman_bitlens, dcodes_huffman_symbols
2519 , dcodes_huffman_min_bitlen)
2520 if symbol < 0 or symbol > 29 then
2521 -- invalid literal/length or distance code in fixed or dynamic block
2522 return -10
2523 end
2524 local dist = _dist_deflate_code_to_base_dist[symbol]
2525 dist = (dist > 4) and (dist
2526 + ReadBits(_dist_deflate_code_to_extra_bitlen[symbol])) or dist
2527
2528 local char_buffer_index = buffer_size-dist+1
2529 if char_buffer_index < buffer_end then
2530 -- distance is too far back in fixed or dynamic block
2531 return -11
2532 end
2533 if char_buffer_index >= -257 then
2534 for _=1, bitlen do
2535 buffer_size = buffer_size + 1
2536 buffer[buffer_size] = buffer[char_buffer_index]
2537 char_buffer_index = char_buffer_index + 1
2538 end
2539 else
2540 char_buffer_index = dict_strlen + char_buffer_index
2541 for _=1, bitlen do
2542 buffer_size = buffer_size + 1
2543 buffer[buffer_size] =
2544 _byte_to_char[dict_string_table[char_buffer_index]]
2545 char_buffer_index = char_buffer_index + 1
2546 end
2547 end
2548 end
2549
2550 if ReaderBitlenLeft() < 0 then
2551 return 2 -- available inflate data did not terminate
2552 end
2553
2554 if buffer_size >= 65536 then
2555 result_buffer[#result_buffer+1] =
2556 table_concat(buffer, "", 1, 32768)
2557 for i=32769, buffer_size do
2558 buffer[i-32768] = buffer[i]
2559 end
2560 buffer_size = buffer_size - 32768
2561 buffer[buffer_size+1] = nil
2562 -- NOTE: buffer[32769..end] and buffer[-257..0] are not cleared.
2563 -- This is why "buffer_size" variable is needed.
2564 end
2565 until symbol == 256
2566
2567 state.buffer_size = buffer_size
2568
2569 return 0
2570end
2571
2572-- Decompress a store block
2573-- @param state decompression state that will be modified by this function.
2574-- @return 0 if succeeds, other value if fails.
2575local function DecompressStoreBlock(state)
2576 local buffer, buffer_size, ReadBits, ReadBytes, ReaderBitlenLeft
2577 , SkipToByteBoundary, result_buffer =
2578 state.buffer, state.buffer_size, state.ReadBits, state.ReadBytes
2579 , state.ReaderBitlenLeft, state.SkipToByteBoundary, state.result_buffer
2580
2581 SkipToByteBoundary()
2582 local bytelen = ReadBits(16)
2583 if ReaderBitlenLeft() < 0 then
2584 return 2 -- available inflate data did not terminate
2585 end
2586 local bytelenComp = ReadBits(16)
2587 if ReaderBitlenLeft() < 0 then
2588 return 2 -- available inflate data did not terminate
2589 end
2590
2591 if bytelen % 256 + bytelenComp % 256 ~= 255 then
2592 return -2 -- Not one's complement
2593 end
2594 if (bytelen-bytelen % 256)/256
2595 + (bytelenComp-bytelenComp % 256)/256 ~= 255 then
2596 return -2 -- Not one's complement
2597 end
2598
2599 -- Note that ReadBytes will skip to the next byte boundary first.
2600 buffer_size = ReadBytes(bytelen, buffer, buffer_size)
2601 if buffer_size < 0 then
2602 return 2 -- available inflate data did not terminate
2603 end
2604
2605 -- memory clean up when there are enough bytes in the buffer.
2606 if buffer_size >= 65536 then
2607 result_buffer[#result_buffer+1] = table_concat(buffer, "", 1, 32768)
2608 for i=32769, buffer_size do
2609 buffer[i-32768] = buffer[i]
2610 end
2611 buffer_size = buffer_size - 32768
2612 buffer[buffer_size+1] = nil
2613 end
2614 state.buffer_size = buffer_size
2615 return 0
2616end
2617
2618-- Decompress a fixed block
2619-- @param state decompression state that will be modified by this function.
2620-- @return 0 if succeeds other value if fails.
2621local function DecompressFixBlock(state)
2622 return DecodeUntilEndOfBlock(state
2623 , _fix_block_literal_huffman_bitlen_count
2624 , _fix_block_literal_huffman_to_deflate_code, 7
2625 , _fix_block_dist_huffman_bitlen_count
2626 , _fix_block_dist_huffman_to_deflate_code, 5)
2627end
2628
2629-- Decompress a dynamic block
2630-- @param state decompression state that will be modified by this function.
2631-- @return 0 if success, other value if fails.
2632local function DecompressDynamicBlock(state)
2633 local ReadBits, Decode = state.ReadBits, state.Decode
2634 local nlen = ReadBits(5) + 257
2635 local ndist = ReadBits(5) + 1
2636 local ncode = ReadBits(4) + 4
2637 if nlen > 286 or ndist > 30 then
2638 -- dynamic block code description: too many length or distance codes
2639 return -3
2640 end
2641
2642 local rle_codes_huffman_bitlens = {}
2643
2644 for i = 1, ncode do
2645 rle_codes_huffman_bitlens[_rle_codes_huffman_bitlen_order[i]] =
2646 ReadBits(3)
2647 end
2648
2649 local rle_codes_err, rle_codes_huffman_bitlen_counts,
2650 rle_codes_huffman_symbols, rle_codes_huffman_min_bitlen =
2651 GetHuffmanForDecode(rle_codes_huffman_bitlens, 18, 7)
2652 if rle_codes_err ~= 0 then -- Require complete code set here
2653 -- dynamic block code description: code lengths codes incomplete
2654 return -4
2655 end
2656
2657 local lcodes_huffman_bitlens = {}
2658 local dcodes_huffman_bitlens = {}
2659 -- Read length/literal and distance code length tables
2660 local index = 0
2661 while index < nlen + ndist do
2662 local symbol -- Decoded value
2663 local bitlen -- Last length to repeat
2664
2665 symbol = Decode(rle_codes_huffman_bitlen_counts
2666 , rle_codes_huffman_symbols, rle_codes_huffman_min_bitlen)
2667
2668 if symbol < 0 then
2669 return symbol -- Invalid symbol
2670 elseif symbol < 16 then
2671 if index < nlen then
2672 lcodes_huffman_bitlens[index] = symbol
2673 else
2674 dcodes_huffman_bitlens[index-nlen] = symbol
2675 end
2676 index = index + 1
2677 else
2678 bitlen = 0
2679 if symbol == 16 then
2680 if index == 0 then
2681 -- dynamic block code description: repeat lengths
2682 -- with no first length
2683 return -5
2684 end
2685 if index-1 < nlen then
2686 bitlen = lcodes_huffman_bitlens[index-1]
2687 else
2688 bitlen = dcodes_huffman_bitlens[index-nlen-1]
2689 end
2690 symbol = 3 + ReadBits(2)
2691 elseif symbol == 17 then -- Repeat zero 3..10 times
2692 symbol = 3 + ReadBits(3)
2693 else -- == 18, repeat zero 11.138 times
2694 symbol = 11 + ReadBits(7)
2695 end
2696 if index + symbol > nlen + ndist then
2697 -- dynamic block code description:
2698 -- repeat more than specified lengths
2699 return -6
2700 end
2701 while symbol > 0 do -- Repeat last or zero symbol times
2702 symbol = symbol - 1
2703 if index < nlen then
2704 lcodes_huffman_bitlens[index] = bitlen
2705 else
2706 dcodes_huffman_bitlens[index-nlen] = bitlen
2707 end
2708 index = index + 1
2709 end
2710 end
2711 end
2712
2713 if (lcodes_huffman_bitlens[256] or 0) == 0 then
2714 -- dynamic block code description: missing end-of-block code
2715 return -9
2716 end
2717
2718 local lcodes_err, lcodes_huffman_bitlen_counts
2719 , lcodes_huffman_symbols, lcodes_huffman_min_bitlen =
2720 GetHuffmanForDecode(lcodes_huffman_bitlens, nlen-1, 15)
2721 --dynamic block code description: invalid literal/length code lengths,
2722 -- Incomplete code ok only for single length 1 code
2723 if (lcodes_err ~=0 and (lcodes_err < 0
2724 or nlen ~= (lcodes_huffman_bitlen_counts[0] or 0)
2725 +(lcodes_huffman_bitlen_counts[1] or 0))) then
2726 return -7
2727 end
2728
2729 local dcodes_err, dcodes_huffman_bitlen_counts
2730 , dcodes_huffman_symbols, dcodes_huffman_min_bitlen =
2731 GetHuffmanForDecode(dcodes_huffman_bitlens, ndist-1, 15)
2732 -- dynamic block code description: invalid distance code lengths,
2733 -- Incomplete code ok only for single length 1 code
2734 if (dcodes_err ~=0 and (dcodes_err < 0
2735 or ndist ~= (dcodes_huffman_bitlen_counts[0] or 0)
2736 + (dcodes_huffman_bitlen_counts[1] or 0))) then
2737 return -8
2738 end
2739
2740 -- Build buffman table for literal/length codes
2741 return DecodeUntilEndOfBlock(state, lcodes_huffman_bitlen_counts
2742 , lcodes_huffman_symbols, lcodes_huffman_min_bitlen
2743 , dcodes_huffman_bitlen_counts, dcodes_huffman_symbols
2744 , dcodes_huffman_min_bitlen)
2745end
2746
2747-- Decompress a deflate stream
2748-- @param state: a decompression state
2749-- @return the decompressed string if succeeds. nil if fails.
2750local function Inflate(state)
2751 local ReadBits = state.ReadBits
2752
2753 local is_last_block
2754 while not is_last_block do
2755 is_last_block = (ReadBits(1) == 1)
2756 local block_type = ReadBits(2)
2757 local status
2758 if block_type == 0 then
2759 status = DecompressStoreBlock(state)
2760 elseif block_type == 1 then
2761 status = DecompressFixBlock(state)
2762 elseif block_type == 2 then
2763 status = DecompressDynamicBlock(state)
2764 else
2765 return nil, -1 -- invalid block type (type == 3)
2766 end
2767 if status ~= 0 then
2768 return nil, status
2769 end
2770 -- ComputerCraft requires this for long-running processes
2771 if os and os.pullEvent then
2772 os.queueEvent("nosleep")
2773 os.pullEvent()
2774 end
2775 end
2776
2777 state.result_buffer[#state.result_buffer+1] =
2778 table_concat(state.buffer, "", 1, state.buffer_size)
2779 local result = table_concat(state.result_buffer)
2780 return result
2781end
2782
2783-- @see LibDeflate:DecompressDeflate(str)
2784-- @see LibDeflate:DecompressDeflateWithDict(str, dictionary)
2785local function DecompressDeflateInternal(str, dictionary)
2786 local state = CreateDecompressState(str, dictionary)
2787 local result, status = Inflate(state)
2788 if not result then
2789 return nil, status
2790 end
2791
2792 local bitlen_left = state.ReaderBitlenLeft()
2793 local bytelen_left = (bitlen_left - bitlen_left % 8) / 8
2794 return result, bytelen_left
2795end
2796
2797-- @see LibDeflate:DecompressZlib(str)
2798-- @see LibDeflate:DecompressZlibWithDict(str)
2799local function DecompressZlibInternal(str, dictionary)
2800 local state = CreateDecompressState(str, dictionary)
2801 local ReadBits = state.ReadBits
2802
2803 local CMF = ReadBits(8)
2804 if state.ReaderBitlenLeft() < 0 then
2805 return nil, 2 -- available inflate data did not terminate
2806 end
2807 local CM = CMF % 16
2808 local CINFO = (CMF - CM) / 16
2809 if CM ~= 8 then
2810 return nil, -12 -- invalid compression method
2811 end
2812 if CINFO > 7 then
2813 return nil, -13 -- invalid window size
2814 end
2815
2816 local FLG = ReadBits(8)
2817 if state.ReaderBitlenLeft() < 0 then
2818 return nil, 2 -- available inflate data did not terminate
2819 end
2820 if (CMF*256+FLG)%31 ~= 0 then
2821 return nil, -14 -- invalid header checksum
2822 end
2823
2824 local FDIST = ((FLG-FLG%32)/32 % 2)
2825 local FLEVEL = ((FLG-FLG%64)/64 % 4) -- luacheck: ignore FLEVEL
2826
2827 if FDIST == 1 then
2828 if not dictionary then
2829 return nil, -16 -- need dictonary, but dictionary is not provided.
2830 end
2831 local byte3 = ReadBits(8)
2832 local byte2 = ReadBits(8)
2833 local byte1 = ReadBits(8)
2834 local byte0 = ReadBits(8)
2835 local actual_adler32 = byte3*16777216+byte2*65536+byte1*256+byte0
2836 if state.ReaderBitlenLeft() < 0 then
2837 return nil, 2 -- available inflate data did not terminate
2838 end
2839 if not IsEqualAdler32(actual_adler32, dictionary.adler32) then
2840 return nil, -17 -- dictionary adler32 does not match
2841 end
2842 end
2843 local result, status = Inflate(state)
2844 if not result then
2845 return nil, status
2846 end
2847 state.SkipToByteBoundary()
2848
2849 local adler_byte0 = ReadBits(8)
2850 local adler_byte1 = ReadBits(8)
2851 local adler_byte2 = ReadBits(8)
2852 local adler_byte3 = ReadBits(8)
2853 if state.ReaderBitlenLeft() < 0 then
2854 return nil, 2 -- available inflate data did not terminate
2855 end
2856
2857 local adler32_expected = adler_byte0*16777216
2858 + adler_byte1*65536 + adler_byte2*256 + adler_byte3
2859 local adler32_actual = LibDeflate:Adler32(result)
2860 if not IsEqualAdler32(adler32_expected, adler32_actual) then
2861 return nil, -15 -- Adler32 checksum does not match
2862 end
2863
2864 local bitlen_left = state.ReaderBitlenLeft()
2865 local bytelen_left = (bitlen_left - bitlen_left % 8) / 8
2866 return result, bytelen_left
2867end
2868
2869--- Decompress a raw deflate compressed data.
2870-- @param str [string] The data to be decompressed.
2871-- @return [string/nil] If the decompression succeeds, return the decompressed
2872-- data. If the decompression fails, return nil. You should check if this return
2873-- value is non-nil to know if the decompression succeeds.
2874-- @return [integer] If the decompression succeeds, return the number of
2875-- unprocessed bytes in the input compressed data. This return value is a
2876-- positive integer if the input data is a valid compressed data appended by an
2877-- arbitary non-empty string. This return value is 0 if the input data does not
2878-- contain any extra bytes.<br>
2879-- If the decompression fails (The first return value of this function is nil),
2880-- this return value is undefined.
2881-- @see LibDeflate:CompressDeflate
2882function LibDeflate:DecompressDeflate(str)
2883 local arg_valid, arg_err = IsValidArguments(str)
2884 if not arg_valid then
2885 error(("Usage: LibDeflate:DecompressDeflate(str): "
2886 ..arg_err), 2)
2887 end
2888 return DecompressDeflateInternal(str)
2889end
2890
2891--- Decompress a raw deflate compressed data with a preset dictionary.
2892-- @param str [string] The data to be decompressed.
2893-- @param dictionary [table] The preset dictionary used by
2894-- LibDeflate:CompressDeflateWithDict when the compressed data is produced.
2895-- Decompression and compression must use the same dictionary.
2896-- Otherwise wrong decompressed data could be produced without generating any
2897-- error.
2898-- @return [string/nil] If the decompression succeeds, return the decompressed
2899-- data. If the decompression fails, return nil. You should check if this return
2900-- value is non-nil to know if the decompression succeeds.
2901-- @return [integer] If the decompression succeeds, return the number of
2902-- unprocessed bytes in the input compressed data. This return value is a
2903-- positive integer if the input data is a valid compressed data appended by an
2904-- arbitary non-empty string. This return value is 0 if the input data does not
2905-- contain any extra bytes.<br>
2906-- If the decompression fails (The first return value of this function is nil),
2907-- this return value is undefined.
2908-- @see LibDeflate:CompressDeflateWithDict
2909function LibDeflate:DecompressDeflateWithDict(str, dictionary)
2910 local arg_valid, arg_err = IsValidArguments(str, true, dictionary)
2911 if not arg_valid then
2912 error(("Usage: LibDeflate:DecompressDeflateWithDict(str, dictionary): "
2913 ..arg_err), 2)
2914 end
2915 return DecompressDeflateInternal(str, dictionary)
2916end
2917
2918--- Decompress a zlib compressed data.
2919-- @param str [string] The data to be decompressed
2920-- @return [string/nil] If the decompression succeeds, return the decompressed
2921-- data. If the decompression fails, return nil. You should check if this return
2922-- value is non-nil to know if the decompression succeeds.
2923-- @return [integer] If the decompression succeeds, return the number of
2924-- unprocessed bytes in the input compressed data. This return value is a
2925-- positive integer if the input data is a valid compressed data appended by an
2926-- arbitary non-empty string. This return value is 0 if the input data does not
2927-- contain any extra bytes.<br>
2928-- If the decompression fails (The first return value of this function is nil),
2929-- this return value is undefined.
2930-- @see LibDeflate:CompressZlib
2931function LibDeflate:DecompressZlib(str)
2932 local arg_valid, arg_err = IsValidArguments(str)
2933 if not arg_valid then
2934 error(("Usage: LibDeflate:DecompressZlib(str): "
2935 ..arg_err), 2)
2936 end
2937 return DecompressZlibInternal(str)
2938end
2939
2940--- Decompress a zlib compressed data with a preset dictionary.
2941-- @param str [string] The data to be decompressed
2942-- @param dictionary [table] The preset dictionary used by
2943-- LibDeflate:CompressDeflateWithDict when the compressed data is produced.
2944-- Decompression and compression must use the same dictionary.
2945-- Otherwise wrong decompressed data could be produced without generating any
2946-- error.
2947-- @return [string/nil] If the decompression succeeds, return the decompressed
2948-- data. If the decompression fails, return nil. You should check if this return
2949-- value is non-nil to know if the decompression succeeds.
2950-- @return [integer] If the decompression succeeds, return the number of
2951-- unprocessed bytes in the input compressed data. This return value is a
2952-- positive integer if the input data is a valid compressed data appended by an
2953-- arbitary non-empty string. This return value is 0 if the input data does not
2954-- contain any extra bytes.<br>
2955-- If the decompression fails (The first return value of this function is nil),
2956-- this return value is undefined.
2957-- @see LibDeflate:CompressZlibWithDict
2958function LibDeflate:DecompressZlibWithDict(str, dictionary)
2959 local arg_valid, arg_err = IsValidArguments(str, true, dictionary)
2960 if not arg_valid then
2961 error(("Usage: LibDeflate:DecompressZlibWithDict(str, dictionary): "
2962 ..arg_err), 2)
2963 end
2964 return DecompressZlibInternal(str, dictionary)
2965end
2966
2967-- Calculate the huffman code of fixed block
2968do
2969 _fix_block_literal_huffman_bitlen = {}
2970 for sym=0, 143 do
2971 _fix_block_literal_huffman_bitlen[sym] = 8
2972 end
2973 for sym=144, 255 do
2974 _fix_block_literal_huffman_bitlen[sym] = 9
2975 end
2976 for sym=256, 279 do
2977 _fix_block_literal_huffman_bitlen[sym] = 7
2978 end
2979 for sym=280, 287 do
2980 _fix_block_literal_huffman_bitlen[sym] = 8
2981 end
2982
2983 _fix_block_dist_huffman_bitlen = {}
2984 for dist=0, 31 do
2985 _fix_block_dist_huffman_bitlen[dist] = 5
2986 end
2987 local status
2988 status, _fix_block_literal_huffman_bitlen_count
2989 , _fix_block_literal_huffman_to_deflate_code =
2990 GetHuffmanForDecode(_fix_block_literal_huffman_bitlen, 287, 9)
2991 assert(status == 0)
2992 status, _fix_block_dist_huffman_bitlen_count,
2993 _fix_block_dist_huffman_to_deflate_code =
2994 GetHuffmanForDecode(_fix_block_dist_huffman_bitlen, 31, 5)
2995 assert(status == 0)
2996
2997 _fix_block_literal_huffman_code =
2998 GetHuffmanCodeFromBitlen(_fix_block_literal_huffman_bitlen_count
2999 , _fix_block_literal_huffman_bitlen, 287, 9)
3000 _fix_block_dist_huffman_code =
3001 GetHuffmanCodeFromBitlen(_fix_block_dist_huffman_bitlen_count
3002 , _fix_block_dist_huffman_bitlen, 31, 5)
3003end
3004
3005--- Decompress a gzip compressed data.
3006-- @param str [string] The data to be decompressed
3007-- @return [string/nil] If the decompression succeeds, return the decompressed
3008-- data. If the decompression fails, return nil. You should check if this return
3009-- value is non-nil to know if the decompression succeeds.
3010-- @return [integer] If the decompression succeeds, return the number of
3011-- unprocessed bytes in the input compressed data. This return value is a
3012-- positive integer if the input data is a valid compressed data appended by an
3013-- arbitary non-empty string. This return value is 0 if the input data does not
3014-- contain any extra bytes.<br>
3015-- If the decompression fails (The first return value of this function is nil),
3016-- this return value is undefined.
3017function LibDeflate:DecompressGzip(str)
3018 local arg_valid, arg_err = IsValidArguments(str)
3019 if not arg_valid then
3020 error(("Usage: LibDeflate:DecompressGzip(str): "..arg_err), 2)
3021 end
3022 if string_byte(string.sub(str, 1, 1)) ~= 31 or string_byte(string.sub(str, 2, 2)) ~= 139 then
3023 return nil, -1
3024 end
3025 if band(string_byte(string.sub(str, 4, 4)), 0xE0) ~= 0 then
3026 return nil, -3
3027 end
3028 if string_byte(string.sub(str, 3, 3)) ~= 8 then
3029 return nil, -4
3030 end
3031 local offset = 10
3032 if band(string_byte(string.sub(str, 4, 4)), 4) == 4 then
3033 offset = offset + string_byte(string.sub(str, 11, 11)) * 256 + string_byte(string.sub(str, 12, 12))
3034 end
3035 if band(string_byte(string.sub(str, 4, 4)), 8) == 8 then
3036 while string_byte(string.sub(str, offset, offset)) ~= 0 do offset = offset + 1 end
3037 end
3038 if band(string_byte(string.sub(str, 4, 4)), 16) == 16 then
3039 while string_byte(string.sub(str, offset, offset)) ~= 0 do offset = offset + 1 end
3040 end
3041 if band(string_byte(string.sub(str, 4, 4)), 2) == 2 then
3042 local src_checksum = string_byte(string.sub(str, offset + 1, offset + 1)) * 256 + string_byte(string.sub(str, offset, offset))
3043 local target_checksum = band(self:CRC32(string.sub(str, 1, offset - 1)), 0xFFFF)
3044 if xor(src_checksum, target_checksum) ~= 0xFFFF then return nil, -5 end
3045 offset = offset + 2
3046 end
3047 local res, err = DecompressDeflateInternal(string.sub(str, offset + 1, -8))
3048 if res == nil then return res, err end
3049 local src_checksum = string_byte(string.sub(str, -5, -5)) * 0x1000000 + string_byte(string.sub(str, -6, -6)) * 0x10000 + string_byte(string.sub(str, -7, -7)) * 256 + string_byte(string.sub(str, -8, -8))
3050 src_checksum = bnot(src_checksum)
3051 local target_checksum = self:CRC32(res)
3052 if xor(src_checksum, target_checksum) ~= 0xFFFFFFFF then return nil, -2 end
3053 return res
3054end
3055
3056-- Encoding algorithms
3057-- Prefix encoding algorithm
3058-- implemented by Galmok of European Stormrage (Horde), galmok@gmail.com
3059-- From LibCompress <https://www.wowace.com/projects/libcompress>,
3060-- which is licensed under GPLv2
3061-- The code has been modified by the author of LibDeflate.
3062------------------------------------------------------------------------------
3063
3064-- to be able to match any requested byte value, the search
3065-- string must be preprocessed characters to escape with %:
3066-- ( ) . % + - * ? [ ] ^ $
3067-- "illegal" byte values:
3068-- 0 is replaces %z
3069local _gsub_escape_table = {
3070 ["\000"] = "%z", ["("] = "%(", [")"] = "%)", ["."] = "%.",
3071 ["%"] = "%%", ["+"] = "%+", ["-"] = "%-", ["*"] = "%*",
3072 ["?"] = "%?", ["["] = "%[", ["]"] = "%]", ["^"] = "%^",
3073 ["$"] = "%$",
3074}
3075
3076local function escape_for_gsub(str)
3077 return str:gsub("([%z%(%)%.%%%+%-%*%?%[%]%^%$])", _gsub_escape_table)
3078end
3079
3080--- Create a custom codec with encoder and decoder. <br>
3081-- This codec is used to convert an input string to make it not contain
3082-- some specific bytes.
3083-- This created codec and the parameters of this function do NOT take
3084-- localization into account. One byte (0-255) in the string is exactly one
3085-- character (0-255).
3086-- Credits to LibCompress.
3087-- @param reserved_chars [string] The created encoder will ensure encoded
3088-- data does not contain any single character in reserved_chars. This parameter
3089-- should be non-empty.
3090-- @param escape_chars [string] The escape character(s) used in the created
3091-- codec. The codec converts any character included in reserved\_chars /
3092-- escape\_chars / map\_chars to (one escape char + one character not in
3093-- reserved\_chars / escape\_chars / map\_chars).
3094-- You usually only need to provide a length-1 string for this parameter.
3095-- Length-2 string is only needed when
3096-- reserved\_chars + escape\_chars + map\_chars is longer than 127.
3097-- This parameter should be non-empty.
3098-- @param map_chars [string] The created encoder will map every
3099-- reserved\_chars:sub(i, i) (1 <= i <= #map\_chars) to map\_chars:sub(i, i).
3100-- This parameter CAN be empty string.
3101-- @return [table/nil] If the codec cannot be created, return nil.<br>
3102-- If the codec can be created according to the given
3103-- parameters, return the codec, which is a encode/decode table.
3104-- The table contains two functions: <br>
3105-- t:Encode(str) returns the encoded string. <br>
3106-- t:Decode(str) returns the decoded string if succeeds. nil if fails.
3107-- @return [nil/string] If the codec is successfully created, return nil.
3108-- If not, return a string that describes the reason why the codec cannot be
3109-- created.
3110-- @usage
3111-- -- Create an encoder/decoder that maps all "\000" to "\003",
3112-- -- and escape "\001" (and "\002" and "\003") properly
3113-- local codec = LibDeflate:CreateCodec("\000\001", "\002", "\003")
3114--
3115-- local encoded = codec:Encode(SOME_STRING)
3116-- -- "encoded" does not contain "\000" or "\001"
3117-- local decoded = codec:Decode(encoded)
3118-- -- assert(decoded == SOME_STRING)
3119function LibDeflate:CreateCodec(reserved_chars, escape_chars
3120 , map_chars)
3121 -- select a default escape character
3122 if type(reserved_chars) ~= "string"
3123 or type(escape_chars) ~= "string"
3124 or type(map_chars) ~= "string" then
3125 error(
3126 "Usage: LibDeflate:CreateCodec(reserved_chars,"
3127 .." escape_chars, map_chars):"
3128 .." All arguments must be string.", 2)
3129 end
3130
3131 if escape_chars == "" then
3132 return nil, "No escape characters supplied."
3133 end
3134 if #reserved_chars < #map_chars then
3135 return nil, "The number of reserved characters must be"
3136 .." at least as many as the number of mapped chars."
3137 end
3138 if reserved_chars == "" then
3139 return nil, "No characters to encode."
3140 end
3141
3142 local encode_bytes = reserved_chars..escape_chars..map_chars
3143 -- build list of bytes not available as a suffix to a prefix byte
3144 local taken = {}
3145 for i = 1, #encode_bytes do
3146 local byte = string_byte(encode_bytes, i, i)
3147 if taken[byte] then -- Modified by LibDeflate:
3148 return nil, "There must be no duplicate characters in the"
3149 .." concatenation of reserved_chars, escape_chars and"
3150 .." map_chars."
3151 end
3152 taken[byte] = true
3153 end
3154
3155 -- Modified by LibDeflate:
3156 -- Store the patterns and replacement in tables for later use.
3157 -- This function is modified that loadstring() lua api is no longer used.
3158 local decode_patterns = {}
3159 local decode_repls = {}
3160
3161 -- the encoding can be a single gsub
3162 -- , but the decoding can require multiple gsubs
3163 local encode_search = {}
3164 local encode_translate = {}
3165
3166 -- map single byte to single byte
3167 if #map_chars > 0 then
3168 local decode_search = {}
3169 local decode_translate = {}
3170 for i = 1, #map_chars do
3171 local from = string_sub(reserved_chars, i, i)
3172 local to = string_sub(map_chars, i, i)
3173 encode_translate[from] = to
3174 encode_search[#encode_search+1] = from
3175 decode_translate[to] = from
3176 decode_search[#decode_search+1] = to
3177 end
3178 decode_patterns[#decode_patterns+1] =
3179 "([".. escape_for_gsub(table_concat(decode_search)).."])"
3180 decode_repls[#decode_repls+1] = decode_translate
3181 end
3182
3183 local escape_char_index = 1
3184 local escape_char = string_sub(escape_chars
3185 , escape_char_index, escape_char_index)
3186 -- map single byte to double-byte
3187 local r = 0 -- suffix char value to the escapeChar
3188
3189 local decode_search = {}
3190 local decode_translate = {}
3191 for i = 1, #encode_bytes do
3192 local c = string_sub(encode_bytes, i, i)
3193 if not encode_translate[c] then
3194 -- this loop will update escapeChar and r
3195 while r >= 256 or taken[r] do
3196 -- Bug in LibCompress r81
3197 -- while r < 256 and taken[r] do
3198 r = r + 1
3199 if r > 255 then -- switch to next escapeChar
3200 decode_patterns[#decode_patterns+1] =
3201 escape_for_gsub(escape_char)
3202 .."(["
3203 .. escape_for_gsub(table_concat(decode_search)).."])"
3204 decode_repls[#decode_repls+1] = decode_translate
3205
3206 escape_char_index = escape_char_index + 1
3207 escape_char = string_sub(escape_chars, escape_char_index
3208 , escape_char_index)
3209 r = 0
3210 decode_search = {}
3211 decode_translate = {}
3212
3213 -- Fixes Another bug in LibCompress r82.
3214 -- LibCompress checks this error condition
3215 -- right after "if r > 255 then"
3216 -- This is why error case should also be tested.
3217 if not escape_char or escape_char == "" then
3218 -- actually I don't need to check
3219 -- "not ecape_char", but what if Lua changes
3220 -- the behavior of string.sub() in the future?
3221 -- we are out of escape chars and we need more!
3222 return nil, "Out of escape characters."
3223 end
3224 end
3225 end
3226
3227 local char_r = _byte_to_char[r]
3228 encode_translate[c] = escape_char..char_r
3229 encode_search[#encode_search+1] = c
3230 decode_translate[char_r] = c
3231 decode_search[#decode_search+1] = char_r
3232 r = r + 1
3233 end
3234 if i == #encode_bytes then
3235 decode_patterns[#decode_patterns+1] =
3236 escape_for_gsub(escape_char).."(["
3237 .. escape_for_gsub(table_concat(decode_search)).."])"
3238 decode_repls[#decode_repls+1] = decode_translate
3239 end
3240 end
3241
3242 local codec = {}
3243
3244 local encode_pattern = "(["
3245 .. escape_for_gsub(table_concat(encode_search)).."])"
3246 local encode_repl = encode_translate
3247
3248 function codec:Encode(str)
3249 if type(str) ~= "string" then
3250 error(("Usage: codec:Encode(str):"
3251 .." 'str' - string expected got '%s'."):format(type(str)), 2)
3252 end
3253 return string_gsub(str, encode_pattern, encode_repl)
3254 end
3255
3256 local decode_tblsize = #decode_patterns
3257 local decode_fail_pattern = "(["
3258 .. escape_for_gsub(reserved_chars).."])"
3259
3260 function codec:Decode(str)
3261 if type(str) ~= "string" then
3262 error(("Usage: codec:Decode(str):"
3263 .." 'str' - string expected got '%s'."):format(type(str)), 2)
3264 end
3265 if string_find(str, decode_fail_pattern) then
3266 return nil
3267 end
3268 for i = 1, decode_tblsize do
3269 str = string_gsub(str, decode_patterns[i], decode_repls[i])
3270 end
3271 return str
3272 end
3273
3274 return codec
3275end
3276
3277local _addon_channel_codec
3278
3279local function GenerateWoWAddonChannelCodec()
3280 return LibDeflate:CreateCodec("\000", "\001", "")
3281end
3282
3283--- Encode the string to make it ready to be transmitted in World of
3284-- Warcraft addon channel. <br>
3285-- The encoded string is guaranteed to contain no NULL ("\000") character.
3286-- @param str [string] The string to be encoded.
3287-- @return The encoded string.
3288-- @see LibDeflate:DecodeForWoWAddonChannel
3289function LibDeflate:EncodeForWoWAddonChannel(str)
3290 if type(str) ~= "string" then
3291 error(("Usage: LibDeflate:EncodeForWoWAddonChannel(str):"
3292 .." 'str' - string expected got '%s'."):format(type(str)), 2)
3293 end
3294 if not _addon_channel_codec then
3295 _addon_channel_codec = GenerateWoWAddonChannelCodec()
3296 end
3297 return _addon_channel_codec:Encode(str)
3298end
3299
3300--- Decode the string produced by LibDeflate:EncodeForWoWAddonChannel
3301-- @param str [string] The string to be decoded.
3302-- @return [string/nil] The decoded string if succeeds. nil if fails.
3303-- @see LibDeflate:EncodeForWoWAddonChannel
3304function LibDeflate:DecodeForWoWAddonChannel(str)
3305 if type(str) ~= "string" then
3306 error(("Usage: LibDeflate:DecodeForWoWAddonChannel(str):"
3307 .." 'str' - string expected got '%s'."):format(type(str)), 2)
3308 end
3309 if not _addon_channel_codec then
3310 _addon_channel_codec = GenerateWoWAddonChannelCodec()
3311 end
3312 return _addon_channel_codec:Decode(str)
3313end
3314
3315-- For World of Warcraft Chat Channel Encoding
3316-- implemented by Galmok of European Stormrage (Horde), galmok@gmail.com
3317-- From LibCompress <https://www.wowace.com/projects/libcompress>,
3318-- which is licensed under GPLv2
3319-- The code has been modified by the author of LibDeflate.
3320-- Following byte values are not allowed:
3321-- \000, s, S, \010, \013, \124, %
3322-- Because SendChatMessage will error
3323-- if an UTF8 multibyte character is incomplete,
3324-- all character values above 127 have to be encoded to avoid this.
3325-- This costs quite a bit of bandwidth (about 13-14%)
3326-- Also, because drunken status is unknown for the received
3327-- , strings used with SendChatMessage should be terminated with
3328-- an identifying byte value, after which the server MAY add "...hic!"
3329-- or as much as it can fit(!).
3330-- Pass the identifying byte as a reserved character to this function
3331-- to ensure the encoding doesn't contain that value.
3332-- or use this: local message, match = arg1:gsub("^(.*)\029.-$", "%1")
3333-- arg1 is message from channel, \029 is the string terminator
3334-- , but may be used in the encoded datastream as well. :-)
3335-- This encoding will expand data anywhere from:
3336-- 0% (average with pure ascii text)
3337-- 53.5% (average with random data valued zero to 255)
3338-- 100% (only encoding data that encodes to two bytes)
3339local function GenerateWoWChatChannelCodec()
3340 local r = {}
3341 for i = 128, 255 do
3342 r[#r+1] = _byte_to_char[i]
3343 end
3344
3345 local reserved_chars = "sS\000\010\013\124%"..table_concat(r)
3346 return LibDeflate:CreateCodec(reserved_chars
3347 , "\029\031", "\015\020")
3348end
3349
3350local _chat_channel_codec
3351
3352--- Encode the string to make it ready to be transmitted in World of
3353-- Warcraft chat channel. <br>
3354-- See also https://wow.gamepedia.com/ValidChatMessageCharacters
3355-- @param str [string] The string to be encoded.
3356-- @return [string] The encoded string.
3357-- @see LibDeflate:DecodeForWoWChatChannel
3358function LibDeflate:EncodeForWoWChatChannel(str)
3359 if type(str) ~= "string" then
3360 error(("Usage: LibDeflate:EncodeForWoWChatChannel(str):"
3361 .." 'str' - string expected got '%s'."):format(type(str)), 2)
3362 end
3363 if not _chat_channel_codec then
3364 _chat_channel_codec = GenerateWoWChatChannelCodec()
3365 end
3366 return _chat_channel_codec:Encode(str)
3367end
3368
3369--- Decode the string produced by LibDeflate:EncodeForWoWChatChannel.
3370-- @param str [string] The string to be decoded.
3371-- @return [string/nil] The decoded string if succeeds. nil if fails.
3372-- @see LibDeflate:EncodeForWoWChatChannel
3373function LibDeflate:DecodeForWoWChatChannel(str)
3374 if type(str) ~= "string" then
3375 error(("Usage: LibDeflate:DecodeForWoWChatChannel(str):"
3376 .." 'str' - string expected got '%s'."):format(type(str)), 2)
3377 end
3378 if not _chat_channel_codec then
3379 _chat_channel_codec = GenerateWoWChatChannelCodec()
3380 end
3381 return _chat_channel_codec:Decode(str)
3382end
3383
3384-- Credits to WeakAuras <https://github.com/WeakAuras/WeakAuras2>,
3385-- and Galmok (galmok@gmail.com) for the 6 bit encoding algorithm.
3386-- The result of encoding will be 25% larger than the
3387-- origin string, but every single byte of the encoding result will be
3388-- printable characters as the following.
3389local _byte_to_6bit_char = {
3390 [0]="a", "b", "c", "d", "e", "f", "g", "h",
3391 "i", "j", "k", "l", "m", "n", "o", "p",
3392 "q", "r", "s", "t", "u", "v", "w", "x",
3393 "y", "z", "A", "B", "C", "D", "E", "F",
3394 "G", "H", "I", "J", "K", "L", "M", "N",
3395 "O", "P", "Q", "R", "S", "T", "U", "V",
3396 "W", "X", "Y", "Z", "0", "1", "2", "3",
3397 "4", "5", "6", "7", "8", "9", "(", ")",
3398}
3399
3400local _6bit_to_byte = {
3401 [97]=0,[98]=1,[99]=2,[100]=3,[101]=4,[102]=5,[103]=6,[104]=7,
3402 [105]=8,[106]=9,[107]=10,[108]=11,[109]=12,[110]=13,[111]=14,[112]=15,
3403 [113]=16,[114]=17,[115]=18,[116]=19,[117]=20,[118]=21,[119]=22,[120]=23,
3404 [121]=24,[122]=25,[65]=26,[66]=27,[67]=28,[68]=29,[69]=30,[70]=31,
3405 [71]=32,[72]=33,[73]=34,[74]=35,[75]=36,[76]=37,[77]=38,[78]=39,
3406 [79]=40,[80]=41,[81]=42,[82]=43,[83]=44,[84]=45,[85]=46,[86]=47,
3407 [87]=48,[88]=49,[89]=50,[90]=51,[48]=52,[49]=53,[50]=54,[51]=55,
3408 [52]=56,[53]=57,[54]=58,[55]=59,[56]=60,[57]=61,[40]=62,[41]=63,
3409}
3410
3411--- Encode the string to make it printable. <br>
3412--
3413-- Credis to WeakAuras2, this function is equivalant to the implementation
3414-- it is using right now. <br>
3415-- The encoded string will be 25% larger than the origin string. However, every
3416-- single byte of the encoded string will be one of 64 printable ASCII
3417-- characters, which are can be easier copied, pasted and displayed.
3418-- (26 lowercase letters, 26 uppercase letters, 10 numbers digits,
3419-- left parenthese, or right parenthese)
3420-- @param str [string] The string to be encoded.
3421-- @return [string] The encoded string.
3422function LibDeflate:EncodeForPrint(str)
3423 if type(str) ~= "string" then
3424 error(("Usage: LibDeflate:EncodeForPrint(str):"
3425 .." 'str' - string expected got '%s'."):format(type(str)), 2)
3426 end
3427 local strlen = #str
3428 local strlenMinus2 = strlen - 2
3429 local i = 1
3430 local buffer = {}
3431 local buffer_size = 0
3432 while i <= strlenMinus2 do
3433 local x1, x2, x3 = string_byte(str, i, i+2)
3434 i = i + 3
3435 local cache = x1+x2*256+x3*65536
3436 local b1 = cache % 64
3437 cache = (cache - b1) / 64
3438 local b2 = cache % 64
3439 cache = (cache - b2) / 64
3440 local b3 = cache % 64
3441 local b4 = (cache - b3) / 64
3442 buffer_size = buffer_size + 1
3443 buffer[buffer_size] =
3444 _byte_to_6bit_char[b1].._byte_to_6bit_char[b2]
3445 .._byte_to_6bit_char[b3].._byte_to_6bit_char[b4]
3446 end
3447
3448 local cache = 0
3449 local cache_bitlen = 0
3450 while i <= strlen do
3451 local x = string_byte(str, i, i)
3452 cache = cache + x * _pow2[cache_bitlen]
3453 cache_bitlen = cache_bitlen + 8
3454 i = i + 1
3455 end
3456 while cache_bitlen > 0 do
3457 local bit6 = cache % 64
3458 buffer_size = buffer_size + 1
3459 buffer[buffer_size] = _byte_to_6bit_char[bit6]
3460 cache = (cache - bit6) / 64
3461 cache_bitlen = cache_bitlen - 6
3462 end
3463
3464 return table_concat(buffer)
3465end
3466
3467--- Decode the printable string produced by LibDeflate:EncodeForPrint.
3468-- "str" will have its prefixed and trailing control characters or space
3469-- removed before it is decoded, so it is easier to use if "str" comes form
3470-- user copy and paste with some prefixed or trailing spaces.
3471-- Then decode fails if the string contains any characters cant be produced by
3472-- LibDeflate:EncodeForPrint. That means, decode fails if the string contains a
3473-- characters NOT one of 26 lowercase letters, 26 uppercase letters,
3474-- 10 numbers digits, left parenthese, or right parenthese.
3475-- @param str [string] The string to be decoded
3476-- @return [string/nil] The decoded string if succeeds. nil if fails.
3477function LibDeflate:DecodeForPrint(str)
3478 if type(str) ~= "string" then
3479 error(("Usage: LibDeflate:DecodeForPrint(str):"
3480 .." 'str' - string expected got '%s'."):format(type(str)), 2)
3481 end
3482 str = str:gsub("^[%c ]+", "")
3483 str = str:gsub("[%c ]+$", "")
3484
3485 local strlen = #str
3486 if strlen == 1 then
3487 return nil
3488 end
3489 local strlenMinus3 = strlen - 3
3490 local i = 1
3491 local buffer = {}
3492 local buffer_size = 0
3493 while i <= strlenMinus3 do
3494 local x1, x2, x3, x4 = string_byte(str, i, i+3)
3495 x1 = _6bit_to_byte[x1]
3496 x2 = _6bit_to_byte[x2]
3497 x3 = _6bit_to_byte[x3]
3498 x4 = _6bit_to_byte[x4]
3499 if not (x1 and x2 and x3 and x4) then
3500 return nil
3501 end
3502 i = i + 4
3503 local cache = x1+x2*64+x3*4096+x4*262144
3504 local b1 = cache % 256
3505 cache = (cache - b1) / 256
3506 local b2 = cache % 256
3507 local b3 = (cache - b2) / 256
3508 buffer_size = buffer_size + 1
3509 buffer[buffer_size] =
3510 _byte_to_char[b1].._byte_to_char[b2].._byte_to_char[b3]
3511 end
3512
3513 local cache = 0
3514 local cache_bitlen = 0
3515 while i <= strlen do
3516 local x = string_byte(str, i, i)
3517 x = _6bit_to_byte[x]
3518 if not x then
3519 return nil
3520 end
3521 cache = cache + x * _pow2[cache_bitlen]
3522 cache_bitlen = cache_bitlen + 6
3523 i = i + 1
3524 end
3525
3526 while cache_bitlen >= 8 do
3527 local byte = cache % 256
3528 buffer_size = buffer_size + 1
3529 buffer[buffer_size] = _byte_to_char[byte]
3530 cache = (cache - byte) / 256
3531 cache_bitlen = cache_bitlen - 8
3532 end
3533
3534 return table_concat(buffer)
3535end
3536
3537local function InternalClearCache()
3538 _chat_channel_codec = nil
3539 _addon_channel_codec = nil
3540end
3541
3542-- For test. Don't use the functions in this table for real application.
3543-- Stuffs in this table is subject to change.
3544LibDeflate.internals = {
3545 LoadStringToTable = LoadStringToTable,
3546 IsValidDictionary = IsValidDictionary,
3547 IsEqualAdler32 = IsEqualAdler32,
3548 _byte_to_6bit_char = _byte_to_6bit_char,
3549 _6bit_to_byte = _6bit_to_byte,
3550 InternalClearCache = InternalClearCache,
3551}
3552
3553--[[-- Commandline options
3554@class table
3555@name CommandlineOptions
3556@usage lua LibDeflate.lua [OPTION] [INPUT] [OUTPUT]
3557\-0 store only. no compression.
3558\-1 fastest compression.
3559\-9 slowest and best compression.
3560\-d do decompression instead of compression.
3561\--dict <filename> specify the file that contains
3562the entire preset dictionary.
3563\--gzip use gzip format instead of raw deflate.
3564\-h give this help.
3565\--strategy <fixed/huffman_only/dynamic> specify a special compression strategy.
3566\-v print the version and copyright info.
3567\--zlib use zlib format instead of raw deflate.
3568]]
3569
3570-- support ComputerCraft shell
3571local arg = _G.arg
3572local debug = debug
3573if pcall(require, "LibDeflate") then
3574 arg = {...}
3575 arg[0] = "LibDeflate.lua"
3576 debug = {getinfo = function()
3577 return {source = "LibDeflate.lua", short_src = "LibDeflate.lua"}
3578 end}
3579 os.exit = function() error() end
3580 io.stderr = {write = function(self, text) printError(text) end}
3581end
3582
3583local function openFile(file, mode)
3584 if shell then
3585 local file = fs.open(file, mode)
3586 local retval = {close = file.close}
3587 if string.find(mode, "r") then retval.read = function()
3588 local retval = ""
3589 local b = file.read()
3590 while b ~= nil do
3591 retval = retval .. string.char(b)
3592 b = file.read()
3593 end
3594 file.close()
3595 return retval
3596 end end
3597 if string.find(mode, "w") then retval.write = function(this, str)
3598 if type(str) ~= "string" then error("Not a string: " .. textutils.serialize(str), 2) end
3599 for s in string.gmatch(str, ".") do file.write(string.byte(s)) end
3600 file.close()
3601 end end
3602 return retval
3603 else return io.open(file, mode) end
3604end
3605
3606-- currently no plan to support stdin and stdout.
3607-- Because Lua in Windows does not set stdout with binary mode.
3608if io and os and debug and arg then
3609 local io = io
3610 local os = os
3611 local debug_info = debug.getinfo(1)
3612 if debug_info.source == arg[0]
3613 or debug_info.short_src == arg[0] then
3614 -- We are indeed runnning THIS file from the commandline.
3615 local input
3616 local output
3617 local i = 1
3618 local status
3619 local compress_mode = 0
3620 local is_decompress = false
3621 local level
3622 local strategy
3623 local dictionary
3624 while (arg[i]) do
3625 local a = arg[i]
3626 if a == "-h" then
3627 print(LibDeflate._COPYRIGHT
3628 .."\nUsage: lua LibDeflate.lua [OPTION] [INPUT] [OUTPUT]\n"
3629 .." -0 store only. no compression.\n"
3630 .." -1 fastest compression.\n"
3631 .." -9 slowest and best compression.\n"
3632 .." -d do decompression instead of compression.\n"
3633 .." --dict <filename> specify the file that contains"
3634 .." the entire preset dictionary.\n"
3635 .." --gzip use gzip format instead of raw deflate.\n"
3636 .." -h give this help.\n"
3637 .." --strategy <fixed/huffman_only/dynamic>"
3638 .." specify a special compression strategy.\n"
3639 .." -v print the version and copyright info.\n"
3640 .." --zlib use zlib format instead of raw deflate.\n")
3641 os.exit(0)
3642 elseif a == "-v" then
3643 print(LibDeflate._COPYRIGHT)
3644 os.exit(0)
3645 elseif a:find("^%-[0-9]$") then
3646 level = tonumber(a:sub(2, 2))
3647 elseif a == "-d" then
3648 is_decompress = true
3649 elseif a == "--dict" then
3650 i = i + 1
3651 local dict_filename = arg[i]
3652 if not dict_filename then
3653 io.stderr:write("You must speicify the dict filename")
3654 os.exit(1)
3655 end
3656 local dict_file, dict_status = openFile(dict_filename, "rb")
3657 if not dict_file then
3658 io.stderr:write(
3659 ("LibDeflate: Cannot read the dictionary file '%s': %s")
3660 :format(dict_filename, dict_status))
3661 os.exit(1)
3662 end
3663 local dict_str = dict_file:read("*all")
3664 dict_file:close()
3665 -- In your lua program, you should pass in adler32 as a CONSTANT
3666 -- , so it actually prevent you from modifying dictionary
3667 -- unintentionally during the program development. I do this
3668 -- here just because no convenient way to verify in commandline.
3669 dictionary = LibDeflate:CreateDictionary(dict_str,
3670 #dict_str, LibDeflate:Adler32(dict_str))
3671 elseif a == "--gzip" then
3672 compress_mode = 2
3673 elseif a == "--strategy" then
3674 -- Not sure if I should check error here
3675 -- If I do, redudant code.
3676 i = i + 1
3677 strategy = arg[i]
3678 elseif a == "--zlib" then
3679 compress_mode = 1
3680 elseif a:find("^%-") then
3681 io.stderr:write(("LibDeflate: Invalid argument: %s")
3682 :format(a))
3683 os.exit(1)
3684 else
3685 if not input then
3686 input, status = openFile(a, "rb")
3687 if not input then
3688 io.stderr:write(
3689 ("LibDeflate: Cannot read the file '%s': %s")
3690 :format(a, tostring(status)))
3691 os.exit(1)
3692 end
3693 elseif not output then
3694 output, status = openFile(a, "wb")
3695 if not output then
3696 io.stderr:write(
3697 ("LibDeflate: Cannot write the file '%s': %s")
3698 :format(a, tostring(status)))
3699 os.exit(1)
3700 end
3701 end
3702 end
3703 i = i + 1
3704 end -- while (arg[i])
3705
3706 if not input or not output then
3707 io.stderr:write("LibDeflate:"
3708 .." You must specify both input and output files.")
3709 os.exit(1)
3710 end
3711
3712 local input_data = input:read("*all")
3713 local configs = {
3714 level = level,
3715 strategy = strategy,
3716 }
3717 local output_data
3718 if not is_decompress then
3719 if compress_mode == 0 then
3720 if not dictionary then
3721 output_data =
3722 LibDeflate:CompressDeflate(input_data, configs)
3723 else
3724 output_data =
3725 LibDeflate:CompressDeflateWithDict(input_data, dictionary
3726 , configs)
3727 end
3728 elseif compress_mode == 1 then
3729 if not dictionary then
3730 output_data =
3731 LibDeflate:CompressZlib(input_data, configs)
3732 else
3733 output_data =
3734 LibDeflate:CompressZlibWithDict(input_data, dictionary
3735 , configs)
3736 end
3737 elseif compress_mode == 2 then
3738 output_data = LibDeflate:CompressGzip(input_data, configs)
3739 end
3740 else
3741 if compress_mode == 0 then
3742 if not dictionary then
3743 output_data = LibDeflate:DecompressDeflate(input_data)
3744 else
3745 output_data = LibDeflate:DecompressDeflateWithDict(
3746 input_data, dictionary)
3747 end
3748 elseif compress_mode == 1 then
3749 if not dictionary then
3750 output_data = LibDeflate:DecompressZlib(input_data)
3751 else
3752 output_data = LibDeflate:DecompressZlibWithDict(
3753 input_data, dictionary)
3754 end
3755 elseif compress_mode == 2 then
3756 output_data = LibDeflate:DecompressGzip(input_data)
3757 end
3758 end
3759
3760 if not output_data then
3761 io.stderr:write("LibDeflate: Decompress fails.")
3762 os.exit(1)
3763 end
3764
3765 output:write(output_data)
3766 if input and input ~= io.stdin then
3767 input:close()
3768 end
3769 if output and output ~= io.stdout then
3770 output:close()
3771 end
3772
3773 io.stderr:write(("Successfully writes %d bytes"):format(
3774 output_data:len()))
3775 os.exit(0)
3776 end
3777end
3778
3779return LibDeflate