· 2 years ago · Oct 01, 2023, 01:15 AM
1
2
3local default_pretty_indent = " "
4local default_pretty_options = { pretty = true, align_keys = false, indent = default_pretty_indent }
5
6local isArray = { __tostring = function() return "JSON array" end } isArray.__index = isArray
7local isObject = { __tostring = function() return "JSON object" end } isObject.__index = isObject
8
9
10function OBJDEF:newArray(tbl)
11 return setmetatable(tbl or {}, isArray)
12end
13
14function OBJDEF:newObject(tbl)
15 return setmetatable(tbl or {}, isObject)
16end
17
18local function unicode_codepoint_as_utf8(codepoint)
19 --
20 -- codepoint is a number
21 --
22 if codepoint <= 127 then
23 return string.char(codepoint)
24
25 elseif codepoint <= 2047 then
26 --
27 -- 110yyyxx 10xxxxxx <-- useful notation from http://en.wikipedia.org/wiki/Utf8
28 --
29 local highpart = math.floor(codepoint / 0x40)
30 local lowpart = codepoint - (0x40 * highpart)
31 return string.char(0xC0 + highpart,
32 0x80 + lowpart)
33
34 elseif codepoint <= 65535 then
35 --
36 -- 1110yyyy 10yyyyxx 10xxxxxx
37 --
38 local highpart = math.floor(codepoint / 0x1000)
39 local remainder = codepoint - 0x1000 * highpart
40 local midpart = math.floor(remainder / 0x40)
41 local lowpart = remainder - 0x40 * midpart
42
43 highpart = 0xE0 + highpart
44 midpart = 0x80 + midpart
45 lowpart = 0x80 + lowpart
46
47 --
48 -- Check for an invalid character (thanks Andy R. at Adobe).
49 -- See table 3.7, page 93, in http://www.unicode.org/versions/Unicode5.2.0/ch03.pdf#G28070
50 --
51 if ( highpart == 0xE0 and midpart < 0xA0 ) or
52 ( highpart == 0xED and midpart > 0x9F ) or
53 ( highpart == 0xF0 and midpart < 0x90 ) or
54 ( highpart == 0xF4 and midpart > 0x8F )
55 then
56 return "?"
57 else
58 return string.char(highpart,
59 midpart,
60 lowpart)
61 end
62
63 else
64 --
65 -- 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx
66 --
67 local highpart = math.floor(codepoint / 0x40000)
68 local remainder = codepoint - 0x40000 * highpart
69 local midA = math.floor(remainder / 0x1000)
70 remainder = remainder - 0x1000 * midA
71 local midB = math.floor(remainder / 0x40)
72 local lowpart = remainder - 0x40 * midB
73
74 return string.char(0xF0 + highpart,
75 0x80 + midA,
76 0x80 + midB,
77 0x80 + lowpart)
78 end
79end
80
81function OBJDEF:onDecodeError(message, text, location, etc)
82 if text then
83 if location then
84 message = string.format("%s at char %d of: %s", message, location, text)
85 else
86 message = string.format("%s: %s", message, text)
87 end
88 end
89
90 if etc ~= nil then
91 message = message .. " (" .. OBJDEF:encode(etc) .. ")"
92 end
93
94 if self.assert then
95 self.assert(false, message)
96 else
97 assert(false, message)
98 end
99end
100
101OBJDEF.onDecodeOfNilError = OBJDEF.onDecodeError
102OBJDEF.onDecodeOfHTMLError = OBJDEF.onDecodeError
103
104function OBJDEF:onEncodeError(message, etc)
105 if etc ~= nil then
106 message = message .. " (" .. OBJDEF:encode(etc) .. ")"
107 end
108
109 if self.assert then
110 self.assert(false, message)
111 else
112 assert(false, message)
113 end
114end
115
116local function grok_number(self, text, start, etc)
117 --
118 -- Grab the integer part
119 --
120 local integer_part = text:match('^-?[1-9]%d*', start)
121 or text:match("^-?0", start)
122
123 if not integer_part then
124 self:onDecodeError("expected number", text, start, etc)
125 end
126
127 local i = start + integer_part:len()
128
129 --
130 -- Grab an optional decimal part
131 --
132 local decimal_part = text:match('^%.%d+', i) or ""
133
134 i = i + decimal_part:len()
135
136 --
137 -- Grab an optional exponential part
138 --
139 local exponent_part = text:match('^[eE][-+]?%d+', i) or ""
140
141 i = i + exponent_part:len()
142
143 local full_number_text = integer_part .. decimal_part .. exponent_part
144 local as_number = tonumber(full_number_text)
145
146 if not as_number then
147 self:onDecodeError("bad number", text, start, etc)
148 end
149
150 return as_number, i
151end
152
153
154local function grok_string(self, text, start, etc)
155
156 if text:sub(start,start) ~= '"' then
157 self:onDecodeError("expected string's opening quote", text, start, etc)
158 end
159
160 local i = start + 1 -- +1 to bypass the initial quote
161 local text_len = text:len()
162 local VALUE = ""
163 while i <= text_len do
164 local c = text:sub(i,i)
165 if c == '"' then
166 return VALUE, i + 1
167 end
168 if c ~= '\\' then
169 VALUE = VALUE .. c
170 i = i + 1
171 elseif text:match('^\\b', i) then
172 VALUE = VALUE .. "\b"
173 i = i + 2
174 elseif text:match('^\\f', i) then
175 VALUE = VALUE .. "\f"
176 i = i + 2
177 elseif text:match('^\\n', i) then
178 VALUE = VALUE .. "\n"
179 i = i + 2
180 elseif text:match('^\\r', i) then
181 VALUE = VALUE .. "\r"
182 i = i + 2
183 elseif text:match('^\\t', i) then
184 VALUE = VALUE .. "\t"
185 i = i + 2
186 else
187 local hex = text:match('^\\u([0123456789aAbBcCdDeEfF][0123456789aAbBcCdDeEfF][0123456789aAbBcCdDeEfF][0123456789aAbBcCdDeEfF])', i)
188 if hex then
189 i = i + 6 -- bypass what we just read
190
191 -- We have a Unicode codepoint. It could be standalone, or if in the proper range and
192 -- followed by another in a specific range, it'll be a two-code surrogate pair.
193 local codepoint = tonumber(hex, 16)
194 if codepoint >= 0xD800 and codepoint <= 0xDBFF then
195 -- it's a hi surrogate... see whether we have a following low
196 local lo_surrogate = text:match('^\\u([dD][cdefCDEF][0123456789aAbBcCdDeEfF][0123456789aAbBcCdDeEfF])', i)
197 if lo_surrogate then
198 i = i + 6 -- bypass the low surrogate we just read
199 codepoint = 0x2400 + (codepoint - 0xD800) * 0x400 + tonumber(lo_surrogate, 16)
200 else
201 -- not a proper low, so we'll just leave the first codepoint as is and spit it out.
202 end
203 end
204 VALUE = VALUE .. unicode_codepoint_as_utf8(codepoint)
205
206 else
207
208 -- just pass through what's escaped
209 VALUE = VALUE .. text:match('^\\(.)', i)
210 i = i + 2
211 end
212 end
213 end
214
215 self:onDecodeError("unclosed string", text, start, etc)
216end
217
218local function skip_whitespace(text, start)
219
220 local _, match_end = text:find("^[ \n\r\t]+", start) -- [http://www.ietf.org/rfc/rfc4627.txt] Section 2
221 if match_end then
222 return match_end + 1
223 else
224 return start
225 end
226end
227
228local grok_one -- assigned later
229
230local function grok_object(self, text, start, etc)
231 if text:sub(start,start) ~= '{' then
232 self:onDecodeError("expected '{'", text, start, etc)
233 end
234
235 local i = skip_whitespace(text, start + 1) -- +1 to skip the '{'
236
237 local VALUE = self.strictTypes and self:newObject { } or { }
238
239 if text:sub(i,i) == '}' then
240 return VALUE, i + 1
241 end
242 local text_len = text:len()
243 while i <= text_len do
244 local key, new_i = grok_string(self, text, i, etc)
245
246 i = skip_whitespace(text, new_i)
247
248 if text:sub(i, i) ~= ':' then
249 self:onDecodeError("expected colon", text, i, etc)
250 end
251
252 i = skip_whitespace(text, i + 1)
253
254 local new_val, new_i = grok_one(self, text, i)
255
256 VALUE[key] = new_val
257
258 --
259 -- Expect now either '}' to end things, or a ',' to allow us to continue.
260 --
261 i = skip_whitespace(text, new_i)
262
263 local c = text:sub(i,i)
264
265 if c == '}' then
266 return VALUE, i + 1
267 end
268
269 if text:sub(i, i) ~= ',' then
270 self:onDecodeError("expected comma or '}'", text, i, etc)
271 end
272
273 i = skip_whitespace(text, i + 1)
274 end
275
276 self:onDecodeError("unclosed '{'", text, start, etc)
277end
278
279local function grok_array(self, text, start, etc)
280 if text:sub(start,start) ~= '[' then
281 self:onDecodeError("expected '['", text, start, etc)
282 end
283
284 local i = skip_whitespace(text, start + 1) -- +1 to skip the '['
285 local VALUE = self.strictTypes and self:newArray { } or { }
286 if text:sub(i,i) == ']' then
287 return VALUE, i + 1
288 end
289
290 local VALUE_INDEX = 1
291
292 local text_len = text:len()
293 while i <= text_len do
294 local val, new_i = grok_one(self, text, i)
295
296 -- can't table.insert(VALUE, val) here because it's a no-op if val is nil
297 VALUE[VALUE_INDEX] = val
298 VALUE_INDEX = VALUE_INDEX + 1
299
300 i = skip_whitespace(text, new_i)
301
302 --
303 -- Expect now either ']' to end things, or a ',' to allow us to continue.
304 --
305 local c = text:sub(i,i)
306 if c == ']' then
307 return VALUE, i + 1
308 end
309 if text:sub(i, i) ~= ',' then
310 self:onDecodeError("expected comma or '['", text, i, etc)
311 end
312 i = skip_whitespace(text, i + 1)
313 end
314 self:onDecodeError("unclosed '['", text, start, etc)
315end
316
317
318grok_one = function(self, text, start, etc)
319 -- Skip any whitespace
320 start = skip_whitespace(text, start)
321
322 if start > text:len() then
323 self:onDecodeError("unexpected end of string", text, nil, etc)
324 end
325
326 if text:find('^"', start) then
327 return grok_string(self, text, start, etc)
328
329 elseif text:find('^[-0123456789 ]', start) then
330 return grok_number(self, text, start, etc)
331
332 elseif text:find('^%{', start) then
333 return grok_object(self, text, start, etc)
334
335 elseif text:find('^%[', start) then
336 return grok_array(self, text, start, etc)
337
338 elseif text:find('^true', start) then
339 return true, start + 4
340
341 elseif text:find('^false', start) then
342 return false, start + 5
343
344 elseif text:find('^null', start) then
345 return nil, start + 4
346
347 else
348 self:onDecodeError("can't parse JSON", text, start, etc)
349 end
350end
351
352function OBJDEF:decode(text, etc)
353 if type(self) ~= 'table' or self.__index ~= OBJDEF then
354 OBJDEF:onDecodeError("JSON:decode must be called in method format", nil, nil, etc)
355 end
356
357 if text == nil then
358 self:onDecodeOfNilError(string.format("nil passed to JSON:decode()"), nil, nil, etc)
359 elseif type(text) ~= 'string' then
360 self:onDecodeError(string.format("expected string argument to JSON:decode(), got %s", type(text)), nil, nil, etc)
361 end
362
363 if text:match('^%s*$') then
364 return nil
365 end
366
367 if text:match('^%s*<') then
368 -- Can't be JSON... we'll assume it's HTML
369 self:onDecodeOfHTMLError(string.format("html passed to JSON:decode()"), text, nil, etc)
370 end
371
372 --
373 -- Ensure that it's not UTF-32 or UTF-16.
374 -- Those are perfectly valid encodings for JSON (as per RFC 4627 section 3),
375 -- but this package can't handle them.
376 --
377 if text:sub(1,1):byte() == 0 or (text:len() >= 2 and text:sub(2,2):byte() == 0) then
378 self:onDecodeError("JSON package groks only UTF-8, sorry", text, nil, etc)
379 end
380
381 local success, value = pcall(grok_one, self, text, 1, etc)
382
383 if success then
384 return value
385 else
386 -- if JSON:onDecodeError() didn't abort out of the pcall, we'll have received the error message here as "value", so pass it along as an assert.
387 if self.assert then
388 self.assert(false, value)
389 else
390 assert(false, value)
391 end
392 -- and if we're still here, return a nil and throw the error message on as a second arg
393 return nil, value
394 end
395end
396
397local function backslash_replacement_function(c)
398 if c == "\n" then
399 return "\\n"
400 elseif c == "\r" then
401 return "\\r"
402 elseif c == "\t" then
403 return "\\t"
404 elseif c == "\b" then
405 return "\\b"
406 elseif c == "\f" then
407 return "\\f"
408 elseif c == '"' then
409 return '\\"'
410 elseif c == '\\' then
411 return '\\\\'
412 else
413 return string.format("\\u%04x", c:byte())
414 end
415end
416
417local chars_to_be_escaped_in_JSON_string
418 = '['
419 .. '"' -- class sub-pattern to match a double quote
420 .. '%\\' -- class sub-pattern to match a backslash
421 .. '%z' -- class sub-pattern to match a null
422 .. '\001' .. '-' .. '\031' -- class sub-pattern to match control characters
423 .. ']'
424
425local function json_string_literal(value)
426 local newval = value:gsub(chars_to_be_escaped_in_JSON_string, backslash_replacement_function)
427 return '"' .. newval .. '"'
428end
429
430local function object_or_array(self, T, etc)
431 --
432 -- We need to inspect all the keys... if there are any strings, we'll convert to a JSON
433 -- object. If there are only numbers, it's a JSON array.
434 --
435 -- If we'll be converting to a JSON object, we'll want to sort the keys so that the
436 -- end result is deterministic.
437 --
438 local string_keys = { }
439 local number_keys = { }
440 local number_keys_must_be_strings = false
441 local maximum_number_key
442
443 for key in pairs(T) do
444 if type(key) == 'string' then
445 table.insert(string_keys, key)
446 elseif type(key) == 'number' then
447 table.insert(number_keys, key)
448 if key <= 0 or key >= math.huge then
449 number_keys_must_be_strings = true
450 elseif not maximum_number_key or key > maximum_number_key then
451 maximum_number_key = key
452 end
453 else
454 self:onEncodeError("can't encode table with a key of type " .. type(key), etc)
455 end
456 end
457
458 if #string_keys == 0 and not number_keys_must_be_strings then
459 --
460 -- An empty table, or a numeric-only array
461 --
462 if #number_keys > 0 then
463 return nil, maximum_number_key -- an array
464 elseif tostring(T) == "JSON array" then
465 return nil
466 elseif tostring(T) == "JSON object" then
467 return { }
468 else
469 -- have to guess, so we'll pick array, since empty arrays are likely more common than empty objects
470 return nil
471 end
472 end
473
474 table.sort(string_keys)
475
476 local map
477 if #number_keys > 0 then
478 --
479 -- If we're here then we have either mixed string/number keys, or numbers inappropriate for a JSON array
480 -- It's not ideal, but we'll turn the numbers into strings so that we can at least create a JSON object.
481 --
482
483 if self.noKeyConversion then
484 self:onEncodeError("a table with both numeric and string keys could be an object or array; aborting", etc)
485 end
486
487 --
488 -- Have to make a shallow copy of the source table so we can remap the numeric keys to be strings
489 --
490 map = { }
491 for key, val in pairs(T) do
492 map[key] = val
493 end
494
495 table.sort(number_keys)
496
497 --
498 -- Throw numeric keys in there as strings
499 --
500 for _, number_key in ipairs(number_keys) do
501 local string_key = tostring(number_key)
502 if map[string_key] == nil then
503 table.insert(string_keys , string_key)
504 map[string_key] = T[number_key]
505 else
506 self:onEncodeError("conflict converting table with mixed-type keys into a JSON object: key " .. number_key .. " exists both as a string and a number.", etc)
507 end
508 end
509 end
510
511 return string_keys, nil, map
512end
513
514--
515-- Encode
516--
517-- 'options' is nil, or a table with possible keys:
518-- pretty -- if true, return a pretty-printed version
519-- indent -- a string (usually of spaces) used to indent each nested level
520-- align_keys -- if true, align all the keys when formatting a table
521--
522local encode_value -- must predeclare because it calls itself
523function encode_value(self, value, parents, etc, options, indent)
524
525 if value == nil then
526 return 'null'
527
528 elseif type(value) == 'string' then
529 return json_string_literal(value)
530
531 elseif type(value) == 'number' then
532 if value ~= value then
533 --
534 -- NaN (Not a Number).
535 -- JSON has no NaN, so we have to fudge the best we can. This should really be a package option.
536 --
537 return "null"
538 elseif value >= math.huge then
539 --
540 -- Positive infinity. JSON has no INF, so we have to fudge the best we can. This should
541 -- really be a package option. Note: at least with some implementations, positive infinity
542 -- is both ">= math.huge" and "<= -math.huge", which makes no sense but that's how it is.
543 -- Negative infinity is properly "<= -math.huge". So, we must be sure to check the ">="
544 -- case first.
545 --
546 return "1e+9999"
547 elseif value <= -math.huge then
548 --
549 -- Negative infinity.
550 -- JSON has no INF, so we have to fudge the best we can. This should really be a package option.
551 --
552 return "-1e+9999"
553 else
554 return tostring(value)
555 end
556
557 elseif type(value) == 'boolean' then
558 return tostring(value)
559
560 elseif type(value) ~= 'table' then
561 self:onEncodeError("can't convert " .. type(value) .. " to JSON", etc)
562
563 else
564 --
565 -- A table to be converted to either a JSON object or array.
566 --
567 local T = value
568
569 if type(options) ~= 'table' then
570 options = {}
571 end
572 if type(indent) ~= 'string' then
573 indent = ""
574 end
575
576 if parents[T] then
577 self:onEncodeError("table " .. tostring(T) .. " is a child of itself", etc)
578 else
579 parents[T] = true
580 end
581
582 local result_value
583
584 local object_keys, maximum_number_key, map = object_or_array(self, T, etc)
585 if maximum_number_key then
586 --
587 -- An array...
588 --
589 local ITEMS = { }
590 for i = 1, maximum_number_key do
591 table.insert(ITEMS, encode_value(self, T[i], parents, etc, options, indent))
592 end
593
594 if options.pretty then
595 result_value = "[ " .. table.concat(ITEMS, ", ") .. " ]"
596 else
597 result_value = "[" .. table.concat(ITEMS, ",") .. "]"
598 end
599
600 elseif object_keys then
601 --
602 -- An object
603 --
604 local TT = map or T
605
606 if options.pretty then
607
608 local KEYS = { }
609 local max_key_length = 0
610 for _, key in ipairs(object_keys) do
611 local encoded = encode_value(self, tostring(key), parents, etc, options, indent)
612 if options.align_keys then
613 max_key_length = math.max(max_key_length, #encoded)
614 end
615 table.insert(KEYS, encoded)
616 end
617 local key_indent = indent .. tostring(options.indent or "")
618 local subtable_indent = key_indent .. string.rep(" ", max_key_length) .. (options.align_keys and " " or "")
619 local FORMAT = "%s%" .. string.format("%d", max_key_length) .. "s: %s"
620
621 local COMBINED_PARTS = { }
622 for i, key in ipairs(object_keys) do
623 local encoded_val = encode_value(self, TT[key], parents, etc, options, subtable_indent)
624 table.insert(COMBINED_PARTS, string.format(FORMAT, key_indent, KEYS[i], encoded_val))
625 end
626 result_value = "{\n" .. table.concat(COMBINED_PARTS, ",\n") .. "\n" .. indent .. "}"
627
628 else
629
630 local PARTS = { }
631 for _, key in ipairs(object_keys) do
632 local encoded_val = encode_value(self, TT[key], parents, etc, options, indent)
633 local encoded_key = encode_value(self, tostring(key), parents, etc, options, indent)
634 table.insert(PARTS, string.format("%s:%s", encoded_key, encoded_val))
635 end
636 result_value = "{" .. table.concat(PARTS, ",") .. "}"
637
638 end
639 else
640 --
641 -- An empty array/object... we'll treat it as an array, though it should really be an option
642 --
643 result_value = "[]"
644 end
645
646 parents[T] = false
647 return result_value
648 end
649end
650
651
652function OBJDEF:encode(value, etc, options)
653 if type(self) ~= 'table' or self.__index ~= OBJDEF then
654 OBJDEF:onEncodeError("JSON:encode must be called in method format", etc)
655 end
656 return encode_value(self, value, {}, etc, options or nil)
657end
658
659function OBJDEF:encode_pretty(value, etc, options)
660 if type(self) ~= 'table' or self.__index ~= OBJDEF then
661 OBJDEF:onEncodeError("JSON:encode_pretty must be called in method format", etc)
662 end
663 return encode_value(self, value, {}, etc, options or default_pretty_options)
664end
665
666function OBJDEF.__tostring()
667 return "JSON encode/decode package"
668end
669
670OBJDEF.__index = OBJDEF
671
672function OBJDEF:new(args)
673 local new = { }
674
675 if args then
676 for key, val in pairs(args) do
677 new[key] = val
678 end
679 end
680
681 return setmetatable(new, OBJDEF)
682end
683
684return OBJDEF:new()