· 6 years ago · Mar 12, 2020, 07:02 PM
1/*
2 * Copyright (c) 2012-2017 Chad Austin
3 *
4 * Permission is hereby granted, free of charge, to any person
5 * obtaining a copy of this software and associated documentation
6 * files (the "Software"), to deal in the Software without
7 * restriction, including without limitation the rights to use, copy,
8 * modify, merge, publish, distribute, sublicense, and/or sell copies
9 * of the Software, and to permit persons to whom the Software is
10 * furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be
13 * included in all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24
25#pragma once
26
27#include <assert.h>
28#include <stdint.h>
29#include <stddef.h>
30#include <string.h>
31#include <math.h>
32#include <limits.h>
33#include <algorithm>
34#include <cstdio>
35#include <limits>
36
37#ifndef SAJSON_NO_STD_STRING
38#include <string> // for convenient access to error messages and string values.
39#endif
40
41#if defined(__GNUC__) || defined(__clang__)
42#define SAJSON_LIKELY(x) __builtin_expect(!!(x), 1)
43#define SAJSON_UNLIKELY(x) __builtin_expect(!!(x), 0)
44#define SAJSON_ALWAYS_INLINE __attribute__((always_inline))
45#define SAJSON_UNREACHABLE() __builtin_unreachable()
46#define SAJSON_snprintf snprintf
47#elif defined(_MSC_VER)
48#define SAJSON_LIKELY(x) x
49#define SAJSON_UNLIKELY(x) x
50#define SAJSON_ALWAYS_INLINE __forceinline
51#define SAJSON_UNREACHABLE() __assume(0)
52#if (_MSC_VER <= 1800)
53#define SAJSON_snprintf _snprintf
54#else
55#define SAJSON_snprintf snprintf
56#endif
57#else
58#define SAJSON_LIKELY(x) x
59#define SAJSON_UNLIKELY(x) x
60#define SAJSON_ALWAYS_INLINE inline
61#define SAJSON_UNREACHABLE() assert(!"unreachable")
62#define SAJSON_snprintf snprintf
63#endif
64
65
66
67#pragma GCC diagnostic push
68#pragma GCC diagnostic ignored "-Wunused"
69#pragma GCC diagnostic ignored "-Wunused-parameter"
70
71
72/**
73 * sajson Public API
74 */
75namespace sajson {
76
77 /// Tag indicating a JSON value's type.
78 enum type: uint8_t {
79 TYPE_INTEGER = 0,
80 TYPE_DOUBLE = 1,
81 TYPE_NULL = 2,
82 TYPE_FALSE = 3,
83 TYPE_TRUE = 4,
84 TYPE_STRING = 5,
85 TYPE_ARRAY = 6,
86 TYPE_OBJECT = 7,
87 };
88
89 namespace internal {
90 static const size_t TYPE_BITS = 3;
91 static const size_t TYPE_MASK = (1 << TYPE_BITS) - 1;
92 static const size_t VALUE_MASK = size_t(-1) >> TYPE_BITS;
93
94 static const size_t ROOT_MARKER = VALUE_MASK;
95
96 inline type get_element_type(size_t s) {
97 return static_cast<type>(s & TYPE_MASK);
98 }
99
100 inline size_t get_element_value(size_t s) {
101 return s >> TYPE_BITS;
102 }
103
104 inline size_t make_element(type t, size_t value) {
105 //assert((value & ~VALUE_MASK) == 0);
106 //value &= VALUE_MASK;
107 return static_cast<size_t>(t) | (value << TYPE_BITS);
108 }
109
110 // This template utilizes the One Definition Rule to create global arrays in a header.
111 // This trick courtesy of Rich Geldreich's Purple JSON parser.
112 template<typename unused=void>
113 struct globals_struct {
114 static const unsigned char parse_flags[256];
115 };
116 typedef globals_struct<> globals;
117
118 // bit 0 (1) - set if: plain ASCII string character
119 // bit 1 (2) - set if: whitespace
120 // bit 4 (0x10) - set if: 0-9 e E .
121 template<typename unused>
122 const uint8_t globals_struct<unused>::parse_flags[256] = {
123 // 0 1 2 3 4 5 6 7 8 9 A B C D E F
124 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0, 2, 0, 0, // 0
125 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1
126 3, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0x11,1, // 2
127 0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11, 0x11,0x11,1, 1, 1, 1, 1, 1, // 3
128 1, 1, 1, 1, 1, 0x11,1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4
129 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, // 5
130 1, 1, 1, 1, 1, 0x11,1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6
131 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 7
132
133 // 128-255
134 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
135 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
136 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
137 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0
138 };
139
140 inline bool is_plain_string_character(char c) {
141 //return c >= 0x20 && c <= 0x7f && c != 0x22 && c != 0x5c;
142 return (globals::parse_flags[static_cast<unsigned char>(c)] & 1) != 0;
143 }
144
145 inline bool is_whitespace(char c) {
146 //return c == '\r' || c == '\n' || c == '\t' || c == ' ';
147 return (globals::parse_flags[static_cast<unsigned char>(c)] & 2) != 0;
148 }
149
150 class allocated_buffer {
151 public:
152 allocated_buffer()
153 : memory(0)
154 {}
155
156 explicit allocated_buffer(size_t length) {
157 // throws std::bad_alloc upon allocation failure
158 void* buffer = operator new(sizeof(size_t) + length);
159 memory = static_cast<layout*>(buffer);
160 memory->refcount = 1;
161 }
162
163 allocated_buffer(const allocated_buffer& that)
164 : memory(that.memory)
165 {
166 incref();
167 }
168
169 allocated_buffer(allocated_buffer&& that)
170 : memory(that.memory)
171 {
172 that.memory = 0;
173 }
174
175 ~allocated_buffer() {
176 decref();
177 }
178
179 allocated_buffer& operator=(const allocated_buffer& that) {
180 if (this != &that) {
181 decref();
182 memory = that.memory;
183 incref();
184 }
185 return *this;
186 }
187
188 allocated_buffer& operator=(allocated_buffer&& that) {
189 if (this != &that) {
190 decref();
191 memory = that.memory;
192 that.memory = 0;
193 }
194 return *this;
195 }
196
197 char* get_data() const {
198 return memory ? memory->data : 0;
199 }
200
201 private:
202 void incref() const {
203 if (memory) {
204 ++(memory->refcount);
205 }
206 }
207
208 void decref() const {
209 if (memory && --(memory->refcount) == 0) {
210 operator delete(memory);
211 }
212 }
213
214 struct layout {
215 size_t refcount;
216 char data[];
217 };
218
219 layout* memory;
220 };
221 }
222
223 /// A simple type encoding a pointer to some memory and a length (in bytes).
224 /// Does not maintain any memory.
225 class string {
226 public:
227 string(const char* text_, size_t length)
228 : text(text_)
229 , _length(length)
230 {}
231
232 const char* data() const {
233 return text;
234 }
235
236 size_t length() const {
237 return _length;
238 }
239
240#ifndef SAJSON_NO_STD_STRING
241 std::string as_string() const {
242 return std::string(text, text + _length);
243 }
244#endif
245
246 private:
247 const char* const text;
248 const size_t _length;
249
250 string(); /*=delete*/
251 };
252
253 /// A convenient way to parse JSON from a string literal. The string ends
254 /// at its first NUL character.
255 class literal : public string {
256 public:
257 template <size_t sz>
258 explicit literal(const char (&text_)[sz])
259 : string(text_, sz - 1)
260 {
261 static_assert(sz > 0, "!");
262 }
263 };
264
265 /// A pointer to a mutable buffer, its size in bytes, and strong ownership of any
266 /// copied memory.
267 class mutable_string_view {
268 public:
269 /// Creates an empty, zero-sized view.
270 mutable_string_view()
271 : length_(0)
272 , data(0)
273 , buffer()
274 {}
275
276 /// Given a length in bytes and a pointer, constructs a view
277 /// that does not allocate a copy of the data or maintain its life.
278 /// The given pointer must stay valid for the duration of the parse and the
279 /// resulting \ref document's life.
280 mutable_string_view(size_t length, char* data_)
281 : length_(length)
282 , data(data_)
283 , buffer()
284 {}
285
286 /// Allocates a copy of the given \ref literal string and exposes a
287 /// mutable view into it. Throws std::bad_alloc if allocation fails.
288 mutable_string_view(const literal& s)
289 : length_(s.length())
290 , buffer(length_)
291 {
292 data = buffer.get_data();
293 memcpy(data, s.data(), length_);
294 }
295
296 /// Allocates a copy of the given \ref string and exposes a mutable view
297 /// into it. Throws std::bad_alloc if allocation fails.
298 mutable_string_view(const string& s)
299 : length_(s.length())
300 , buffer(length_)
301 {
302 data = buffer.get_data();
303 memcpy(data, s.data(), length_);
304 }
305
306 /// Copies a mutable_string_view. If any backing memory has been
307 /// allocated, its refcount is incremented - both views can safely
308 /// use the memory.
309 mutable_string_view(const mutable_string_view& that)
310 : length_(that.length_)
311 , data(that.data)
312 , buffer(that.buffer)
313 {}
314
315 /// Move constructor - neuters the old mutable_string_view.
316 mutable_string_view(mutable_string_view&& that)
317 : length_(that.length_)
318 , data(that.data)
319 , buffer(std::move(that.buffer))
320 {
321 that.length_ = 0;
322 that.data = 0;
323 }
324
325 mutable_string_view& operator=(mutable_string_view&& that) {
326 if (this != &that) {
327 length_ = that.length_;
328 data = that.data;
329 buffer = std::move(that.buffer);
330 that.length_ = 0;
331 that.data = 0;
332 }
333 return *this;
334 }
335
336 mutable_string_view& operator=(const mutable_string_view& that) {
337 if (this != &that) {
338 length_ = that.length_;
339 data = that.data;
340 buffer = that.buffer;
341 }
342 return *this;
343 }
344
345 size_t length() const {
346 return length_;
347 }
348
349 char* get_data() const {
350 return data;
351 }
352
353 private:
354 size_t length_;
355 char* data;
356 internal::allocated_buffer buffer; // may not be allocated
357 };
358
359 namespace internal {
360 struct object_key_record {
361 size_t key_start;
362 size_t key_end;
363 size_t value;
364 };
365
366 struct object_key_comparator {
367 object_key_comparator(const char* object_data)
368 : data(object_data)
369 {}
370
371 bool operator()(const object_key_record& lhs, const string& rhs) const {
372 const size_t lhs_length = lhs.key_end - lhs.key_start;
373 const size_t rhs_length = rhs.length();
374 if (lhs_length < rhs_length) {
375 return true;
376 } else if (lhs_length > rhs_length) {
377 return false;
378 }
379 return memcmp(data + lhs.key_start, rhs.data(), lhs_length) < 0;
380 }
381
382 bool operator()(const string& lhs, const object_key_record& rhs) const {
383 return !(*this)(rhs, lhs);
384 }
385
386 bool operator()(
387 const object_key_record& lhs,
388 const object_key_record& rhs
389 ) {
390 const size_t lhs_length = lhs.key_end - lhs.key_start;
391 const size_t rhs_length = rhs.key_end - rhs.key_start;
392 if (lhs_length < rhs_length) {
393 return true;
394 } else if (lhs_length > rhs_length) {
395 return false;
396 }
397 return memcmp(
398 data + lhs.key_start,
399 data + rhs.key_start,
400 lhs_length
401 ) < 0;
402 }
403
404 const char* data;
405 };
406 }
407
408 namespace integer_storage {
409 enum {
410 word_length = 1
411 };
412
413 inline int load(const size_t* location) {
414 int value;
415 memcpy(&value, location, sizeof(value));
416 return value;
417 }
418
419 inline void store(size_t* location, int value) {
420 // NOTE: Most modern compilers optimize away this constant-size
421 // memcpy into a single instruction. If any don't, and treat
422 // punning through a union as legal, they can be special-cased.
423 static_assert(
424 sizeof(value) <= sizeof(*location),
425 "size_t must not be smaller than int");
426 memcpy(location, &value, sizeof(value));
427 }
428 }
429
430 namespace double_storage {
431 enum {
432 word_length = sizeof(double) / sizeof(size_t)
433 };
434
435 inline double load(const size_t* location) {
436 double value;
437 memcpy(&value, location, sizeof(double));
438 return value;
439 }
440
441 inline void store(size_t* location, double value) {
442 // NOTE: Most modern compilers optimize away this constant-size
443 // memcpy into a single instruction. If any don't, and treat
444 // punning through a union as legal, they can be special-cased.
445 memcpy(location, &value, sizeof(double));
446 }
447 }
448
449 /// Represents a JSON value. First, call get_type() to check its type,
450 /// which determines which methods are available.
451 ///
452 /// Note that \ref value does not maintain any backing memory, only the
453 /// corresponding \ref document does. It is illegal to access a \ref value
454 /// after its \ref document has been destroyed.
455 class value {
456 public:
457 /// Returns the JSON value's \ref type.
458 type get_type() const {
459 return value_type;
460 }
461
462 /// Returns the length of the object or array.
463 /// Only legal if get_type() is TYPE_ARRAY or TYPE_OBJECT.
464 size_t get_length() const {
465 assert_type_2(TYPE_ARRAY, TYPE_OBJECT);
466 return payload[0];
467 }
468
469 /// Returns the nth element of an array. Calling with an out-of-bound
470 /// index is undefined behavior.
471 /// Only legal if get_type() is TYPE_ARRAY.
472 value get_array_element(size_t index) const {
473 using namespace internal;
474 assert_type(TYPE_ARRAY);
475 size_t element = payload[1 + index];
476 return value(get_element_type(element), payload + get_element_value(element), text);
477 }
478
479 /// Returns the nth key of an object. Calling with an out-of-bound
480 /// index is undefined behavior.
481 /// Only legal if get_type() is TYPE_OBJECT.
482 string get_object_key(size_t index) const {
483 assert_type(TYPE_OBJECT);
484 const size_t* s = payload + 1 + index * 3;
485 return string(text + s[0], s[1] - s[0]);
486 }
487
488 /// Returns the nth value of an object. Calling with an out-of-bound
489 /// index is undefined behavior. Only legal if get_type() is TYPE_OBJECT.
490 value get_object_value(size_t index) const {
491 using namespace internal;
492 assert_type(TYPE_OBJECT);
493 size_t element = payload[3 + index * 3];
494 return value(get_element_type(element), payload + get_element_value(element), text);
495 }
496
497 /// Given a string key, returns the value with that key or a null value
498 /// if the key is not found. Running time is O(lg N).
499 /// Only legal if get_type() is TYPE_OBJECT.
500 value get_value_of_key(const string& key) const {
501 assert_type(TYPE_OBJECT);
502 size_t i = find_object_key(key);
503 if (i < get_length()) {
504 return get_object_value(i);
505 } else {
506 return value(TYPE_NULL, 0, 0);
507 }
508 }
509
510 /// Given a string key, returns the index of the associated value if
511 /// one exists. Returns get_length() if there is no such key.
512 /// Note: sajson sorts object keys, so the running time is O(lg N).
513 /// Only legal if get_type() is TYPE_OBJECT
514 size_t find_object_key(const string& key) const {
515 using namespace internal;
516 assert_type(TYPE_OBJECT);
517 const object_key_record* start = reinterpret_cast<const object_key_record*>(payload + 1);
518 const object_key_record* end = start + get_length();
519#ifdef SAJSON_UNSORTED_OBJECT_KEYS
520 for (const object_key_record* i = start; i != end; ++i)
521#else
522 const object_key_record* i = std::lower_bound(start, end, key, object_key_comparator(text));
523#endif
524 if (i != end
525 && (i->key_end - i->key_start) == key.length()
526 && memcmp(key.data(), text + i->key_start, key.length()) == 0) {
527 return i - start;
528 }
529 return get_length();
530 }
531
532 /// If a numeric value was parsed as a 32-bit integer, returns it.
533 /// Only legal if get_type() is TYPE_INTEGER.
534 int get_integer_value() const {
535 assert_type(TYPE_INTEGER);
536 return integer_storage::load(payload);
537 }
538
539 /// If a numeric value was parsed as a double, returns it.
540 /// Only legal if get_type() is TYPE_DOUBLE.
541 double get_double_value() const {
542 assert_type(TYPE_DOUBLE);
543 return double_storage::load(payload);
544 }
545
546 /// Returns a numeric value as a double-precision float.
547 /// Only legal if get_type() is TYPE_INTEGER or TYPE_DOUBLE.
548 double get_number_value() const {
549 assert_type_2(TYPE_INTEGER, TYPE_DOUBLE);
550 if (get_type() == TYPE_INTEGER) {
551 return get_integer_value();
552 } else {
553 return get_double_value();
554 }
555 }
556
557 /// Returns true and writes to the output argument if the numeric value
558 /// fits in a 53-bit integer. This is useful for timestamps and other
559 /// situations where integral values with greater than 32-bit precision
560 /// are used, as 64-bit values are not understood by all JSON
561 /// implementations or languages.
562 /// Returns false if the value is not an integer or not in range.
563 /// Only legal if get_type() is TYPE_INTEGER or TYPE_DOUBLE.
564 bool get_int53_value(int64_t* out) const {
565 // Make sure the output variable is always defined to avoid any
566 // possible situation like
567 // https://gist.github.com/chadaustin/2c249cb850619ddec05b23ca42cf7a18
568 *out = 0;
569
570 assert_type_2(TYPE_INTEGER, TYPE_DOUBLE);
571 if (get_type() == TYPE_INTEGER) {
572 *out = get_integer_value();
573 return true;
574 } else if (get_type() == TYPE_DOUBLE) {
575 double v = get_double_value();
576 if (v < -(1LL << 53) || v > (1LL << 53)) {
577 return false;
578 }
579 int64_t as_int = static_cast<int64_t>(v);
580 if (as_int != v) {
581 return false;
582 }
583 *out = as_int;
584 return true;
585 } else {
586 return false;
587 }
588 }
589
590 /// Returns the length of the string.
591 /// Only legal if get_type() is TYPE_STRING.
592 size_t get_string_length() const {
593 assert_type(TYPE_STRING);
594 return payload[1] - payload[0];
595 }
596
597 /// Returns a pointer to the beginning of a string value's data.
598 /// WARNING: Calling this function and using the return value as a
599 /// C-style string (that is, without also using get_string_length())
600 /// will cause the string to appear truncated if the string has
601 /// embedded NULs.
602 /// Only legal if get_type() is TYPE_STRING.
603 const char* as_cstring() const {
604 assert_type(TYPE_STRING);
605 return text + payload[0];
606 }
607
608#ifndef SAJSON_NO_STD_STRING
609 /// Returns a string's value as a std::string.
610 /// Only legal if get_type() is TYPE_STRING.
611 std::string as_string() const {
612 assert_type(TYPE_STRING);
613 return std::string(text + payload[0], text + payload[1]);
614 }
615#endif
616
617 /// \cond INTERNAL
618 const size_t* _internal_get_payload() const {
619 return payload;
620 }
621 /// \endcond
622
623 private:
624 explicit value(type value_type_, const size_t* payload_, const char* text_)
625 : value_type(value_type_)
626 , payload(payload_)
627 , text(text_)
628 {}
629
630 void assert_type(type expected) const {
631 assert(expected == get_type());
632 }
633
634 void assert_type_2(type e1, type e2) const {
635 assert(e1 == get_type() || e2 == get_type());
636 }
637
638 void assert_in_bounds(size_t i) const {
639 assert(i < get_length());
640 }
641
642 const type value_type;
643 const size_t* const payload;
644 const char* const text;
645
646 friend class document;
647 };
648
649 /// Error code indicating why parse failed.
650 enum error {
651 ERROR_NO_ERROR,
652 ERROR_OUT_OF_MEMORY,
653 ERROR_UNEXPECTED_END,
654 ERROR_MISSING_ROOT_ELEMENT,
655 ERROR_BAD_ROOT,
656 ERROR_EXPECTED_COMMA,
657 ERROR_MISSING_OBJECT_KEY,
658 ERROR_EXPECTED_COLON,
659 ERROR_EXPECTED_END_OF_INPUT,
660 ERROR_UNEXPECTED_COMMA,
661 ERROR_EXPECTED_VALUE,
662 ERROR_EXPECTED_NULL,
663 ERROR_EXPECTED_FALSE,
664 ERROR_EXPECTED_TRUE,
665 ERROR_INVALID_NUMBER,
666 ERROR_MISSING_EXPONENT,
667 ERROR_ILLEGAL_CODEPOINT,
668 ERROR_INVALID_UNICODE_ESCAPE,
669 ERROR_UNEXPECTED_END_OF_UTF16,
670 ERROR_EXPECTED_U,
671 ERROR_INVALID_UTF16_TRAIL_SURROGATE,
672 ERROR_UNKNOWN_ESCAPE,
673 ERROR_INVALID_UTF8,
674 };
675
676 namespace internal {
677 class ownership {
678 public:
679 ownership() = delete;
680 ownership(const ownership&) = delete;
681 void operator=(const ownership&) = delete;
682
683 explicit ownership(size_t* p_)
684 : p(p_)
685 {}
686
687 ownership(ownership&& p_)
688 : p(p_.p) {
689 p_.p = 0;
690 }
691
692 ~ownership() {
693 delete[] p;
694 }
695
696 bool is_valid() const {
697 return !!p;
698 }
699
700 private:
701 size_t* p;
702 };
703
704 inline const char* get_error_text(error error_code) {
705 switch (error_code) {
706 case ERROR_NO_ERROR: return "no error";
707 case ERROR_OUT_OF_MEMORY: return "out of memory";
708 case ERROR_UNEXPECTED_END: return "unexpected end of input";
709 case ERROR_MISSING_ROOT_ELEMENT: return "missing root element";
710 case ERROR_BAD_ROOT: return "document root must be object or array";
711 case ERROR_EXPECTED_COMMA: return "expected ,";
712 case ERROR_MISSING_OBJECT_KEY: return "missing object key";
713 case ERROR_EXPECTED_COLON: return "expected :";
714 case ERROR_EXPECTED_END_OF_INPUT: return "expected end of input";
715 case ERROR_UNEXPECTED_COMMA: return "unexpected comma";
716 case ERROR_EXPECTED_VALUE: return "expected value";
717 case ERROR_EXPECTED_NULL: return "expected 'null'";
718 case ERROR_EXPECTED_FALSE: return "expected 'false'";
719 case ERROR_EXPECTED_TRUE: return "expected 'true'";
720 case ERROR_INVALID_NUMBER: return "invalid number";
721 case ERROR_MISSING_EXPONENT: return "missing exponent";
722 case ERROR_ILLEGAL_CODEPOINT: return "illegal unprintable codepoint in string";
723 case ERROR_INVALID_UNICODE_ESCAPE: return "invalid character in unicode escape";
724 case ERROR_UNEXPECTED_END_OF_UTF16: return "unexpected end of input during UTF-16 surrogate pair";
725 case ERROR_EXPECTED_U: return "expected \\u";
726 case ERROR_INVALID_UTF16_TRAIL_SURROGATE: return "invalid UTF-16 trail surrogate";
727 case ERROR_UNKNOWN_ESCAPE: return "unknown escape";
728 case ERROR_INVALID_UTF8: return "invalid UTF-8";
729 }
730
731 SAJSON_UNREACHABLE();
732 }
733 }
734
735 /**
736 * Represents the result of a JSON parse: either is_valid() and the document
737 * contains a root value or parse error information is available.
738 *
739 * Note that the document holds a strong reference to any memory allocated:
740 * any mutable copy of the input text and any memory allocated for the
741 * AST data structure. Thus, the document must not be deallocated while any
742 * \ref value is in use.
743 */
744 class document {
745 public:
746 document(document&& rhs)
747 : input(rhs.input)
748 , structure(std::move(rhs.structure))
749 , root_type(rhs.root_type)
750 , root(rhs.root)
751 , error_line(rhs.error_line)
752 , error_column(rhs.error_column)
753 , error_code(rhs.error_code)
754 , error_arg(rhs.error_arg)
755 {
756 // Yikes... but strcpy is okay here because formatted_error is
757 // guaranteed to be null-terminated.
758 strcpy(formatted_error_message, rhs.formatted_error_message);
759 // should rhs's fields be zeroed too?
760 }
761
762 /**
763 * Returns true if the document was parsed successfully.
764 * If true, call get_root() to access the document's root value.
765 * If false, call get_error_line(), get_error_column(), and
766 * get_error_message_as_cstring() to see why the parse failed.
767 */
768 bool is_valid() const {
769 return root_type == TYPE_ARRAY || root_type == TYPE_OBJECT;
770 }
771
772 /// If is_valid(), returns the document's root \ref value.
773 value get_root() const {
774 return value(root_type, root, input.get_data());
775 }
776
777 /// If not is_valid(), returns the one-based line number where the parse failed.
778 size_t get_error_line() const {
779 return error_line;
780 }
781
782 /// If not is_valid(), returns the one-based column number where the parse failed.
783 size_t get_error_column() const {
784 return error_column;
785 }
786
787#ifndef SAJSON_NO_STD_STRING
788 /// If not is_valid(), returns a std::string indicating why the parse failed.
789 std::string get_error_message_as_string() const {
790 return formatted_error_message;
791 }
792#endif
793
794 /// If not is_valid(), returns a null-terminated C string indicating why the parse failed.
795 const char* get_error_message_as_cstring() const {
796 return formatted_error_message;
797 }
798
799 /// \cond INTERNAL
800
801 // WARNING: Internal function which is subject to change
802 error _internal_get_error_code() const {
803 return error_code;
804 }
805
806 // WARNING: Internal function which is subject to change
807 int _internal_get_error_argument() const {
808 return error_arg;
809 }
810
811 // WARNING: Internal function which is subject to change
812 const char* _internal_get_error_text() const {
813 return internal::get_error_text(error_code);
814 }
815
816 // WARNING: Internal function exposed only for high-performance language bindings.
817 type _internal_get_root_type() const {
818 return root_type;
819 }
820
821 // WARNING: Internal function exposed only for high-performance language bindings.
822 const size_t* _internal_get_root() const {
823 return root;
824 }
825
826 // WARNING: Internal function exposed only for high-performance language bindings.
827 const mutable_string_view& _internal_get_input() const {
828 return input;
829 }
830
831 /// \endcond
832
833 private:
834 document(const document&) = delete;
835 void operator=(const document&) = delete;
836
837 explicit document(const mutable_string_view& input_, internal::ownership&& structure_, type root_type_, const size_t* root_)
838 : input(input_)
839 , structure(std::move(structure_))
840 , root_type(root_type_)
841 , root(root_)
842 , error_line(0)
843 , error_column(0)
844 , error_code(ERROR_NO_ERROR)
845 , error_arg(0)
846 {
847 formatted_error_message[0] = 0;
848 }
849
850 explicit document(const mutable_string_view& input_, size_t error_line_, size_t error_column_, const error error_code_, int error_arg_)
851 : input(input_)
852 , structure(0)
853 , root_type(TYPE_NULL)
854 , root(0)
855 , error_line(error_line_)
856 , error_column(error_column_)
857 , error_code(error_code_)
858 , error_arg(error_arg_)
859 {
860 formatted_error_message[ERROR_BUFFER_LENGTH - 1] = 0;
861 int written = has_significant_error_arg()
862 ? SAJSON_snprintf(formatted_error_message, ERROR_BUFFER_LENGTH - 1, "%s: %d", _internal_get_error_text(), error_arg)
863 : SAJSON_snprintf(formatted_error_message, ERROR_BUFFER_LENGTH - 1, "%s", _internal_get_error_text());
864 (void)written;
865 assert(written >= 0 && written < ERROR_BUFFER_LENGTH);
866 }
867
868 bool has_significant_error_arg() const {
869 return error_code == ERROR_ILLEGAL_CODEPOINT;
870 }
871
872 mutable_string_view input;
873 internal::ownership structure;
874 const type root_type;
875 const size_t* const root;
876 const size_t error_line;
877 const size_t error_column;
878 const error error_code;
879 const int error_arg;
880
881 enum { ERROR_BUFFER_LENGTH = 128 };
882 char formatted_error_message[ERROR_BUFFER_LENGTH];
883
884 template<typename AllocationStrategy, typename StringType>
885 friend document parse(const AllocationStrategy& strategy, const StringType& string);
886 template<typename Allocator>
887 friend class parser;
888 };
889
890 /// Allocation policy that allocates one large buffer guaranteed to hold the
891 /// resulting AST. This allocation policy is the fastest since it requires
892 /// no conditionals to see if more memory must be allocated.
893 class single_allocation {
894 public:
895 /// \cond INTERNAL
896
897 class stack_head {
898 public:
899 stack_head(stack_head&& other)
900 : stack_bottom(other.stack_bottom)
901 , stack_top(other.stack_top)
902 {}
903
904 bool push(size_t element) {
905 *stack_top++ = element;
906 return true;
907 }
908
909 size_t* reserve(size_t amount, bool* success) {
910 size_t* rv = stack_top;
911 stack_top += amount;
912 *success = true;
913 return rv;
914 }
915
916 // The compiler does not see the stack_head (stored in a local)
917 // and the allocator (stored as a field) have the same stack_bottom
918 // values, so it does a bit of redundant work.
919 // So there's a microoptimization available here: introduce a type
920 // "stack_mark" and make it polymorphic on the allocator. For
921 // single_allocation, it merely needs to be a single pointer.
922
923 void reset(size_t new_top) {
924 stack_top = stack_bottom + new_top;
925 }
926
927 size_t get_size() {
928 return stack_top - stack_bottom;
929 }
930
931 size_t* get_top() {
932 return stack_top;
933 }
934
935 size_t* get_pointer_from_offset(size_t offset) {
936 return stack_bottom + offset;
937 }
938
939 private:
940 stack_head() = delete;
941 stack_head(const stack_head&) = delete;
942 void operator=(const stack_head&) = delete;
943
944 explicit stack_head(size_t* base)
945 : stack_bottom(base)
946 , stack_top(base)
947 {}
948
949 size_t* const stack_bottom;
950 size_t* stack_top;
951
952 friend class single_allocation;
953 };
954
955 class allocator {
956 public:
957 allocator() = delete;
958 allocator(const allocator&) = delete;
959 void operator=(const allocator&) = delete;
960
961 explicit allocator(size_t* buffer, size_t input_size, bool should_deallocate_)
962 : structure(buffer)
963 , structure_end(buffer ? buffer + input_size : 0)
964 , write_cursor(structure_end)
965 , should_deallocate(should_deallocate_)
966 {}
967
968 explicit allocator(std::nullptr_t)
969 : structure(0)
970 , structure_end(0)
971 , write_cursor(0)
972 , should_deallocate(false)
973 {}
974
975 allocator(allocator&& other)
976 : structure(other.structure)
977 , structure_end(other.structure_end)
978 , write_cursor(other.write_cursor)
979 , should_deallocate(other.should_deallocate)
980 {
981 other.structure = 0;
982 other.structure_end = 0;
983 other.write_cursor = 0;
984 other.should_deallocate = false;
985 }
986
987 ~allocator() {
988 if (should_deallocate) {
989 delete[] structure;
990 }
991 }
992
993 stack_head get_stack_head(bool* success) {
994 *success = true;
995 return stack_head(structure);
996 }
997
998 size_t get_write_offset() {
999 return structure_end - write_cursor;
1000 }
1001
1002 size_t* get_write_pointer_of(size_t v) {
1003 return structure_end - v;
1004 }
1005
1006 size_t* reserve(size_t size, bool* success) {
1007 *success = true;
1008 write_cursor -= size;
1009 return write_cursor;
1010 }
1011
1012 size_t* get_ast_root() {
1013 return write_cursor;
1014 }
1015
1016 internal::ownership transfer_ownership() {
1017 auto p = structure;
1018 structure = 0;
1019 structure_end = 0;
1020 write_cursor = 0;
1021 if (should_deallocate) {
1022 return internal::ownership(p);
1023 } else {
1024 return internal::ownership(0);
1025 }
1026 }
1027
1028 private:
1029 size_t* structure;
1030 size_t* structure_end;
1031 size_t* write_cursor;
1032 bool should_deallocate;
1033 };
1034
1035 /// \endcond
1036
1037 /// Allocate a single worst-case AST buffer with one word per byte in
1038 /// the input document.
1039 single_allocation()
1040 : has_existing_buffer(false)
1041 , existing_buffer(0)
1042 , existing_buffer_size(0)
1043 {}
1044
1045 /// Write the AST into an existing buffer. Will fail with an out of
1046 /// memory error if the buffer is not guaranteed to be big enough for
1047 /// the document. The caller must guarantee the memory is valid for
1048 /// the duration of the parse and the AST traversal.
1049 single_allocation(size_t* existing_buffer_, size_t size_in_words)
1050 : has_existing_buffer(true)
1051 , existing_buffer(existing_buffer_)
1052 , existing_buffer_size(size_in_words)
1053 {}
1054
1055 /// Convenience wrapper for single_allocation(size_t*, size_t) that
1056 /// automatically infers the length of a given array.
1057 template<size_t N>
1058 explicit single_allocation(size_t (&existing_buffer_)[N])
1059 : single_allocation(existing_buffer_, N)
1060 {}
1061
1062 /// \cond INTERNAL
1063
1064 allocator make_allocator(size_t input_document_size_in_bytes, bool* succeeded) const {
1065 if (has_existing_buffer) {
1066 if (existing_buffer_size < input_document_size_in_bytes) {
1067 *succeeded = false;
1068 return allocator(nullptr);
1069 }
1070 *succeeded = true;
1071 return allocator(existing_buffer, input_document_size_in_bytes, false);
1072 } else {
1073 size_t* buffer = new(std::nothrow) size_t[input_document_size_in_bytes];
1074 if (!buffer) {
1075 *succeeded = false;
1076 return allocator(nullptr);
1077 }
1078 *succeeded = true;
1079 return allocator(buffer, input_document_size_in_bytes, true);
1080 }
1081 }
1082
1083 /// \endcond
1084
1085 private:
1086 bool has_existing_buffer;
1087 size_t* existing_buffer;
1088 size_t existing_buffer_size;
1089 };
1090
1091 /// Allocation policy that uses dynamically-growing buffers for both the
1092 /// parse stack and the AST. This allocation policy minimizes peak memory
1093 /// usage at the cost of some allocation and copying churn.
1094 class dynamic_allocation {
1095 public:
1096 /// \cond INTERNAL
1097
1098 class stack_head {
1099 public:
1100 stack_head(stack_head&& other)
1101 : stack_top(other.stack_top)
1102 , stack_bottom(other.stack_bottom)
1103 , stack_limit(other.stack_limit)
1104 {
1105 other.stack_top = 0;
1106 other.stack_bottom = 0;
1107 other.stack_limit = 0;
1108 }
1109
1110 ~stack_head() {
1111 delete[] stack_bottom;
1112 }
1113
1114 bool push(size_t element) {
1115 if (can_grow(1)) {
1116 *stack_top++ = element;
1117 return true;
1118 } else {
1119 return false;
1120 }
1121 }
1122
1123 size_t* reserve(size_t amount, bool* success) {
1124 if (can_grow(amount)) {
1125 size_t* rv = stack_top;
1126 stack_top += amount;
1127 *success = true;
1128 return rv;
1129 } else {
1130 *success = false;
1131 return 0;
1132 }
1133 }
1134
1135 void reset(size_t new_top) {
1136 stack_top = stack_bottom + new_top;
1137 }
1138
1139 size_t get_size() {
1140 return stack_top - stack_bottom;
1141 }
1142
1143 size_t* get_top() {
1144 return stack_top;
1145 }
1146
1147 size_t* get_pointer_from_offset(size_t offset) {
1148 return stack_bottom + offset;
1149 }
1150
1151 private:
1152 stack_head(const stack_head&) = delete;
1153 void operator=(const stack_head&) = delete;
1154
1155 explicit stack_head(size_t initial_capacity, bool* success) {
1156 assert(initial_capacity);
1157 stack_bottom = new(std::nothrow) size_t[initial_capacity];
1158 stack_top = stack_bottom;
1159 if (stack_bottom) {
1160 stack_limit = stack_bottom + initial_capacity;
1161 } else {
1162 stack_limit = 0;
1163 }
1164 *success = !!stack_bottom;
1165 }
1166
1167 bool can_grow(size_t amount) {
1168 if (SAJSON_LIKELY(amount <= static_cast<size_t>(stack_limit - stack_top))) {
1169 return true;
1170 }
1171
1172 size_t current_size = stack_top - stack_bottom;
1173 size_t old_capacity = stack_limit - stack_bottom;
1174 size_t new_capacity = old_capacity * 2;
1175 while (new_capacity < amount + current_size) {
1176 new_capacity *= 2;
1177 }
1178 size_t* new_stack = new(std::nothrow) size_t[new_capacity];
1179 if (!new_stack) {
1180 stack_top = 0;
1181 stack_bottom = 0;
1182 stack_limit = 0;
1183 return false;
1184 }
1185
1186 memcpy(new_stack, stack_bottom, current_size * sizeof(size_t));
1187 delete[] stack_bottom;
1188 stack_top = new_stack + current_size;
1189 stack_bottom = new_stack;
1190 stack_limit = stack_bottom + new_capacity;
1191 return true;
1192 }
1193
1194 size_t* stack_top; // stack grows up: stack_top >= stack_bottom
1195 size_t* stack_bottom;
1196 size_t* stack_limit;
1197
1198 friend class dynamic_allocation;
1199 };
1200
1201 class allocator {
1202 public:
1203 allocator() = delete;
1204 allocator(const allocator&) = delete;
1205 void operator=(const allocator&) = delete;
1206
1207 explicit allocator(size_t* buffer_, size_t current_capacity, size_t initial_stack_capacity_)
1208 : ast_buffer_bottom(buffer_)
1209 , ast_buffer_top(buffer_ + current_capacity)
1210 , ast_write_head(ast_buffer_top)
1211 , initial_stack_capacity(initial_stack_capacity_)
1212 {}
1213
1214 explicit allocator(std::nullptr_t)
1215 : ast_buffer_bottom(0)
1216 , ast_buffer_top(0)
1217 , ast_write_head(0)
1218 , initial_stack_capacity(0)
1219 {}
1220
1221 allocator(allocator&& other)
1222 : ast_buffer_bottom(other.ast_buffer_bottom)
1223 , ast_buffer_top(other.ast_buffer_top)
1224 , ast_write_head(other.ast_write_head)
1225 , initial_stack_capacity(other.initial_stack_capacity)
1226 {
1227 other.ast_buffer_bottom = 0;
1228 other.ast_buffer_top = 0;
1229 other.ast_write_head = 0;
1230 }
1231
1232 ~allocator() {
1233 delete[] ast_buffer_bottom;
1234 }
1235
1236 stack_head get_stack_head(bool* success) {
1237 return stack_head(initial_stack_capacity, success);
1238 }
1239
1240 size_t get_write_offset() {
1241 return ast_buffer_top - ast_write_head;
1242 }
1243
1244 size_t* get_write_pointer_of(size_t v) {
1245 return ast_buffer_top - v;
1246 }
1247
1248 size_t* reserve(size_t size, bool* success) {
1249 if (can_grow(size)) {
1250 ast_write_head -= size;
1251 *success = true;
1252 return ast_write_head;
1253 } else {
1254 *success = false;
1255 return 0;
1256 }
1257 }
1258
1259 size_t* get_ast_root() {
1260 return ast_write_head;
1261 }
1262
1263 internal::ownership transfer_ownership() {
1264 auto p = ast_buffer_bottom;
1265 ast_buffer_bottom = 0;
1266 ast_buffer_top = 0;
1267 ast_write_head = 0;
1268 return internal::ownership(p);
1269 }
1270
1271 private:
1272 bool can_grow(size_t amount) {
1273 if (SAJSON_LIKELY(amount <= static_cast<size_t>(ast_write_head - ast_buffer_bottom))) {
1274 return true;
1275 }
1276 size_t current_capacity = ast_buffer_top - ast_buffer_bottom;
1277
1278 size_t current_size = ast_buffer_top - ast_write_head;
1279 size_t new_capacity = current_capacity * 2;
1280 while (new_capacity < amount + current_size) {
1281 new_capacity *= 2;
1282 }
1283
1284 size_t* old_buffer = ast_buffer_bottom;
1285 size_t* new_buffer = new(std::nothrow) size_t[new_capacity];
1286 if (!new_buffer) {
1287 ast_buffer_bottom = 0;
1288 ast_buffer_top = 0;
1289 ast_write_head = 0;
1290 return false;
1291 }
1292
1293 size_t* old_write_head = ast_write_head;
1294 ast_buffer_bottom = new_buffer;
1295 ast_buffer_top = new_buffer + new_capacity;
1296 ast_write_head = ast_buffer_top - current_size;
1297 memcpy(ast_write_head, old_write_head, current_size * sizeof(size_t));
1298 delete[] old_buffer;
1299
1300 return true;
1301 }
1302
1303 size_t* ast_buffer_bottom; // base address of the ast buffer - it grows down
1304 size_t* ast_buffer_top;
1305 size_t* ast_write_head;
1306 size_t initial_stack_capacity;
1307 };
1308
1309 /// \endcond
1310
1311 /// Creates a dynamic_allocation policy with the given initial AST
1312 /// and stack buffer sizes.
1313 dynamic_allocation(size_t initial_ast_capacity_ = 0, size_t initial_stack_capacity_ = 0)
1314 : initial_ast_capacity(initial_ast_capacity_)
1315 , initial_stack_capacity(initial_stack_capacity_)
1316 {}
1317
1318 /// \cond INTERNAL
1319
1320 allocator make_allocator(size_t input_document_size_in_bytes, bool* succeeded) const {
1321 size_t capacity = initial_ast_capacity;
1322 if (!capacity) {
1323 // TODO: guess based on input document size
1324 capacity = 1024;
1325 }
1326
1327 size_t* buffer = new(std::nothrow) size_t[capacity];
1328 if (!buffer) {
1329 *succeeded = false;
1330 return allocator(nullptr);
1331 }
1332
1333 size_t stack_capacity = initial_stack_capacity;
1334 if (!stack_capacity) {
1335 stack_capacity = 256;
1336 }
1337
1338 *succeeded = true;
1339 return allocator(buffer, capacity, stack_capacity);
1340 }
1341
1342 /// \endcond
1343
1344 private:
1345 size_t initial_ast_capacity;
1346 size_t initial_stack_capacity;
1347 };
1348
1349 /// Allocation policy that attempts to fit the parsed AST into an existing
1350 /// memory buffer. This allocation policy is useful when using sajson in
1351 /// a zero-allocation context or when there are constraints on the amount
1352 // of memory that can be used.
1353 class bounded_allocation {
1354 public:
1355 /// \cond INTERNAL
1356
1357 class allocator;
1358
1359 class stack_head {
1360 public:
1361 stack_head(stack_head&& other)
1362 : source_allocator(other.source_allocator)
1363 {
1364 other.source_allocator = 0;
1365 }
1366
1367 bool push(size_t element) {
1368 if (SAJSON_LIKELY(source_allocator->can_grow(1))) {
1369 *(source_allocator->stack_top)++ = element;
1370 return true;
1371 } else {
1372 return false;
1373 }
1374 }
1375
1376 size_t* reserve(size_t amount, bool* success) {
1377 if (SAJSON_LIKELY(source_allocator->can_grow(amount))) {
1378 size_t* rv = source_allocator->stack_top;
1379 source_allocator->stack_top += amount;
1380 *success = true;
1381 return rv;
1382 } else {
1383 *success = false;
1384 return 0;
1385 }
1386 }
1387
1388 void reset(size_t new_top) {
1389 source_allocator->stack_top = source_allocator->structure + new_top;
1390 }
1391
1392 size_t get_size() {
1393 return source_allocator->stack_top - source_allocator->structure;
1394 }
1395
1396 size_t* get_top() {
1397 return source_allocator->stack_top;
1398 }
1399
1400 size_t* get_pointer_from_offset(size_t offset) {
1401 return source_allocator->structure + offset;
1402 }
1403
1404 private:
1405 stack_head(const stack_head&) = delete;
1406 void operator=(const stack_head&) = delete;
1407
1408 explicit stack_head(allocator* source_allocator_)
1409 : source_allocator(source_allocator_)
1410 {}
1411
1412 allocator* source_allocator;
1413
1414 friend class bounded_allocation;
1415 };
1416
1417 class allocator {
1418 public:
1419 allocator() = delete;
1420 allocator(const allocator&) = delete;
1421 void operator=(const allocator&) = delete;
1422
1423 explicit allocator(size_t* existing_buffer, size_t existing_buffer_size)
1424 : structure(existing_buffer)
1425 , structure_end(existing_buffer + existing_buffer_size)
1426 , write_cursor(structure_end)
1427 , stack_top(structure)
1428 {}
1429
1430 allocator(allocator&& other)
1431 : structure(other.structure)
1432 , structure_end(other.structure_end)
1433 , write_cursor(other.write_cursor)
1434 , stack_top(other.stack_top)
1435 {
1436 other.structure = 0;
1437 other.structure_end = 0;
1438 other.write_cursor = 0;
1439 other.stack_top = 0;
1440 }
1441
1442 stack_head get_stack_head(bool* success) {
1443 *success = true;
1444 return stack_head(this);
1445 }
1446
1447 size_t get_write_offset() {
1448 return structure_end - write_cursor;
1449 }
1450
1451 size_t* get_write_pointer_of(size_t v) {
1452 return structure_end - v;
1453 }
1454
1455 size_t* reserve(size_t size, bool* success) {
1456 if (can_grow(size)) {
1457 write_cursor -= size;
1458 *success = true;
1459 return write_cursor;
1460 } else {
1461 *success = false;
1462 return 0;
1463 }
1464 }
1465
1466 size_t* get_ast_root() {
1467 return write_cursor;
1468 }
1469
1470 internal::ownership transfer_ownership() {
1471 structure = 0;
1472 structure_end = 0;
1473 write_cursor = 0;
1474 return internal::ownership(0);
1475 }
1476
1477 private:
1478 bool can_grow(size_t amount) {
1479 // invariant: stack_top <= write_cursor
1480 // thus: write_cursor - stack_top is positive
1481 return static_cast<size_t>(write_cursor - stack_top) >= amount;
1482 }
1483
1484 size_t* structure;
1485 size_t* structure_end;
1486 size_t* write_cursor;
1487 size_t* stack_top;
1488
1489 friend class bounded_allocation;
1490 };
1491
1492 /// \endcond
1493
1494 /// Uses an existing buffer to hold the parsed AST, if it fits. The
1495 /// specified buffer must not be deallocated until after the document
1496 /// is parsed and the AST traversed.
1497 bounded_allocation(size_t* existing_buffer_, size_t size_in_words)
1498 : existing_buffer(existing_buffer_)
1499 , existing_buffer_size(size_in_words)
1500 {}
1501
1502 /// Convenience wrapper for bounded_allocation(size_t*, size) that
1503 /// automatically infers the size of the given array.
1504 template<size_t N>
1505 explicit bounded_allocation(size_t (&existing_buffer_)[N])
1506 : bounded_allocation(existing_buffer_, N)
1507 {}
1508
1509 /// \cond INTERNAL
1510
1511 allocator make_allocator(size_t input_document_size_in_bytes, bool* succeeded) const {
1512 *succeeded = true;
1513 return allocator(existing_buffer, existing_buffer_size);
1514 }
1515
1516 /// \endcond
1517
1518 private:
1519 size_t* existing_buffer;
1520 size_t existing_buffer_size;
1521 };
1522
1523 // I thought about putting parser in the internal namespace but I don't
1524 // want to indent it further...
1525 /// \cond INTERNAL
1526 template<typename Allocator>
1527 class parser {
1528 public:
1529 parser(const mutable_string_view& msv, Allocator&& allocator_)
1530 : input(msv)
1531 , input_end(input.get_data() + input.length())
1532 , allocator(std::move(allocator_))
1533 , root_type(TYPE_NULL)
1534 , error_line(0)
1535 , error_column(0)
1536 {}
1537
1538 document get_document() {
1539 if (parse()) {
1540 size_t* ast_root = allocator.get_ast_root();
1541 return document(input, allocator.transfer_ownership(), root_type, ast_root);
1542 } else {
1543 return document(input, error_line, error_column, error_code, error_arg);
1544 }
1545 }
1546
1547 private:
1548 struct error_result {
1549 operator bool() const {
1550 return false;
1551 }
1552 operator char*() const {
1553 return 0;
1554 }
1555 };
1556
1557 bool at_eof(const char* p) {
1558 return p == input_end;
1559 }
1560
1561 char* skip_whitespace(char* p) {
1562 // There is an opportunity to make better use of superscalar
1563 // hardware here* but if someone cares about JSON parsing
1564 // performance the first thing they do is minify, so prefer
1565 // to optimize for code size here.
1566 // * https://github.com/chadaustin/Web-Benchmarks/blob/master/json/third-party/pjson/pjson.h#L1873
1567 for (;;) {
1568 if (SAJSON_UNLIKELY(p == input_end)) {
1569 return 0;
1570 } else if (internal::is_whitespace(*p)) {
1571 ++p;
1572 } else {
1573 return p;
1574 }
1575 }
1576 }
1577
1578 error_result oom(char* p) {
1579 return make_error(p, ERROR_OUT_OF_MEMORY);
1580 }
1581
1582 error_result unexpected_end() {
1583 return make_error(0, ERROR_UNEXPECTED_END);
1584 }
1585
1586 error_result unexpected_end(char* p) {
1587 return make_error(p, ERROR_UNEXPECTED_END);
1588 }
1589
1590 error_result make_error(char* p, error code, int arg = 0) {
1591 if (!p) {
1592 p = input_end;
1593 }
1594
1595 error_line = 1;
1596 error_column = 1;
1597
1598 char* c = input.get_data();
1599 while (c < p) {
1600 if (*c == '\r') {
1601 if (c + 1 < p && c[1] == '\n') {
1602 ++error_line;
1603 error_column = 1;
1604 ++c;
1605 } else {
1606 ++error_line;
1607 error_column = 1;
1608 }
1609 } else if (*c == '\n') {
1610 ++error_line;
1611 error_column = 1;
1612 } else {
1613 // TODO: count UTF-8 characters
1614 ++error_column;
1615 }
1616 ++c;
1617 }
1618
1619 error_code = code;
1620 error_arg = arg;
1621 return error_result();
1622 }
1623
1624 bool parse() {
1625 using namespace internal;
1626
1627 // p points to the character currently being parsed
1628 char* p = input.get_data();
1629
1630 bool success;
1631 auto stack = allocator.get_stack_head(&success); //this is the new buffer of strategy
1632 if (SAJSON_UNLIKELY(!success)) {
1633 return oom(p);
1634 }
1635
1636 p = skip_whitespace(p);
1637 if (SAJSON_UNLIKELY(!p)) {
1638 return make_error(p, ERROR_MISSING_ROOT_ELEMENT);
1639 }
1640
1641 // current_base is an offset to the first element of the current structure (object or array)
1642 size_t current_base = stack.get_size();
1643 type current_structure_type;
1644 if (*p == '[') {
1645 current_structure_type = TYPE_ARRAY;
1646 bool s = stack.push(make_element(current_structure_type, ROOT_MARKER));
1647 if (SAJSON_UNLIKELY(!s)) {
1648 return oom(p);
1649 }
1650 goto array_close_or_element;
1651 } else if (*p == '{') {
1652 current_structure_type = TYPE_OBJECT;
1653 bool s = stack.push(make_element(current_structure_type, ROOT_MARKER));
1654 if (SAJSON_UNLIKELY(!s)) {
1655 return oom(p);
1656 }
1657 goto object_close_or_element;
1658 } else {
1659 return make_error(p, ERROR_BAD_ROOT);
1660 }
1661
1662 // BEGIN STATE MACHINE
1663
1664 size_t pop_element; // used as an argument into the `pop` routine
1665
1666 if (0) { // purely for structure
1667
1668 // ASSUMES: byte at p SHOULD be skipped
1669 array_close_or_element:
1670 p = skip_whitespace(p + 1);
1671 if (SAJSON_UNLIKELY(!p)) {
1672 return unexpected_end();
1673 }
1674 if (*p == ']') {
1675 goto pop_array;
1676 } else {
1677 goto next_element;
1678 }
1679 SAJSON_UNREACHABLE();
1680
1681 // ASSUMES: byte at p SHOULD be skipped
1682 object_close_or_element:
1683 p = skip_whitespace(p + 1);
1684 if (SAJSON_UNLIKELY(!p)) {
1685 return unexpected_end();
1686 }
1687 if (*p == '}') {
1688 goto pop_object;
1689 } else {
1690 goto object_key;
1691 }
1692 SAJSON_UNREACHABLE();
1693
1694 // ASSUMES: byte at p SHOULD NOT be skipped
1695 structure_close_or_comma:
1696 p = skip_whitespace(p);
1697 if (SAJSON_UNLIKELY(!p)) {
1698 return unexpected_end();
1699 }
1700
1701 if (current_structure_type == TYPE_ARRAY) {
1702 if (*p == ']') {
1703 goto pop_array;
1704 } else {
1705 if (SAJSON_UNLIKELY(*p != ',')) {
1706 return make_error(p, ERROR_EXPECTED_COMMA);
1707 }
1708 ++p;
1709 goto next_element;
1710 }
1711 } else {
1712 assert(current_structure_type == TYPE_OBJECT);
1713 if (*p == '}') {
1714 goto pop_object;
1715 } else {
1716 if (SAJSON_UNLIKELY(*p != ',')) {
1717 return make_error(p, ERROR_EXPECTED_COMMA);
1718 }
1719 ++p;
1720 goto object_key;
1721 }
1722 }
1723 SAJSON_UNREACHABLE();
1724
1725 // ASSUMES: *p == '}'
1726 pop_object: {
1727 ++p;
1728 size_t* base_ptr = stack.get_pointer_from_offset(current_base);
1729 pop_element = *base_ptr;
1730 if (SAJSON_UNLIKELY(!install_object(base_ptr + 1, stack.get_top()))) {
1731 return oom(p);
1732 }
1733 goto pop;
1734 }
1735
1736 // ASSUMES: *p == ']'
1737 pop_array: {
1738 ++p;
1739 size_t* base_ptr = stack.get_pointer_from_offset(current_base);
1740 pop_element = *base_ptr;
1741 if (SAJSON_UNLIKELY(!install_array(base_ptr + 1, stack.get_top()))) {
1742 return oom(p);
1743 }
1744 goto pop;
1745 }
1746
1747 // ASSUMES: byte at p SHOULD NOT be skipped
1748 object_key: {
1749 p = skip_whitespace(p);
1750 if (SAJSON_UNLIKELY(!p)) {
1751 return unexpected_end();
1752 }
1753 if (SAJSON_UNLIKELY(*p != '"')) {
1754 return make_error(p, ERROR_MISSING_OBJECT_KEY);
1755 }
1756 bool success_;
1757 size_t* out = stack.reserve(2, &success_);
1758 if (SAJSON_UNLIKELY(!success_)) {
1759 return oom(p);
1760 }
1761 p = parse_string(p, out);
1762 if (SAJSON_UNLIKELY(!p)) {
1763 return false;
1764 }
1765 p = skip_whitespace(p);
1766 if (SAJSON_UNLIKELY(!p || *p != ':')) {
1767 return make_error(p, ERROR_EXPECTED_COLON);
1768 }
1769 ++p;
1770 goto next_element;
1771 }
1772
1773 // ASSUMES: byte at p SHOULD NOT be skipped
1774 next_element:
1775 p = skip_whitespace(p);
1776 if (SAJSON_UNLIKELY(!p)) {
1777 return unexpected_end();
1778 }
1779
1780 type value_type_result;
1781 switch (*p) {
1782 case 0:
1783 return unexpected_end(p);
1784 case 'n':
1785 p = parse_null(p);
1786 if (!p) {
1787 return false;
1788 }
1789 value_type_result = TYPE_NULL;
1790 break;
1791 case 'f':
1792 p = parse_false(p);
1793 if (!p) {
1794 return false;
1795 }
1796 value_type_result = TYPE_FALSE;
1797 break;
1798 case 't':
1799 p = parse_true(p);
1800 if (!p) {
1801 return false;
1802 }
1803 value_type_result = TYPE_TRUE;
1804 break;
1805 case '0':
1806 case '1':
1807 case '2':
1808 case '3':
1809 case '4':
1810 case '5':
1811 case '6':
1812 case '7':
1813 case '8':
1814 case '9':
1815 case '-': {
1816 auto result = parse_number(p);
1817 p = result.first;
1818 if (!p) {
1819 return false;
1820 }
1821 value_type_result = result.second;
1822 break;
1823 }
1824 case '"': {
1825 bool success_;
1826 size_t* string_tag = allocator.reserve(2, &success_);
1827 if (SAJSON_UNLIKELY(!success_)) {
1828 return oom(p);
1829 }
1830 p = parse_string(p, string_tag);
1831 if (!p) {
1832 return false;
1833 }
1834 value_type_result = TYPE_STRING;
1835 break;
1836 }
1837
1838 case '[': {
1839 size_t previous_base = current_base;
1840 current_base = stack.get_size();
1841 bool s = stack.push(make_element(current_structure_type, previous_base));
1842 if (SAJSON_UNLIKELY(!s)) {
1843 return oom(p);
1844 }
1845 current_structure_type = TYPE_ARRAY;
1846 goto array_close_or_element;
1847 }
1848 case '{': {
1849 size_t previous_base = current_base;
1850 current_base = stack.get_size();
1851 bool s = stack.push(make_element(current_structure_type, previous_base));
1852 if (SAJSON_UNLIKELY(!s)) {
1853 return oom(p);
1854 }
1855 current_structure_type = TYPE_OBJECT;
1856 goto object_close_or_element;
1857 }
1858 pop: {
1859 size_t parent = get_element_value(pop_element);
1860 if (parent == ROOT_MARKER) {
1861 root_type = current_structure_type;
1862 p = skip_whitespace(p);
1863 if (SAJSON_UNLIKELY(p)) {
1864 return make_error(p, ERROR_EXPECTED_END_OF_INPUT);
1865 }
1866 return true;
1867 }
1868 stack.reset(current_base);
1869 current_base = parent;
1870 value_type_result = current_structure_type;
1871 current_structure_type = get_element_type(pop_element);
1872 break;
1873 }
1874
1875 case ',':
1876 return make_error(p, ERROR_UNEXPECTED_COMMA);
1877 default:
1878 return make_error(p, ERROR_EXPECTED_VALUE);
1879 }
1880
1881 bool s = stack.push(make_element(
1882 value_type_result,
1883 allocator.get_write_offset()));
1884 if (SAJSON_UNLIKELY(!s)) {
1885 return oom(p);
1886 }
1887
1888 goto structure_close_or_comma;
1889 }
1890
1891 SAJSON_UNREACHABLE();
1892 }
1893
1894 bool has_remaining_characters(char* p, ptrdiff_t remaining) {
1895 return input_end - p >= remaining;
1896 }
1897
1898 char* parse_null(char* p) {
1899 if (SAJSON_UNLIKELY(!has_remaining_characters(p, 4))) {
1900 make_error(p, ERROR_UNEXPECTED_END);
1901 return 0;
1902 }
1903 char p1 = p[1];
1904 char p2 = p[2];
1905 char p3 = p[3];
1906 if (SAJSON_UNLIKELY(p1 != 'u' || p2 != 'l' || p3 != 'l')) {
1907 make_error(p, ERROR_EXPECTED_NULL);
1908 return 0;
1909 }
1910 return p + 4;
1911 }
1912
1913 char* parse_false(char* p) {
1914 if (SAJSON_UNLIKELY(!has_remaining_characters(p, 5))) {
1915 return make_error(p, ERROR_UNEXPECTED_END);
1916 }
1917 char p1 = p[1];
1918 char p2 = p[2];
1919 char p3 = p[3];
1920 char p4 = p[4];
1921 if (SAJSON_UNLIKELY(p1 != 'a' || p2 != 'l' || p3 != 's' || p4 != 'e')) {
1922 return make_error(p, ERROR_EXPECTED_FALSE);
1923 }
1924 return p + 5;
1925 }
1926
1927 char* parse_true(char* p) {
1928 if (SAJSON_UNLIKELY(!has_remaining_characters(p, 4))) {
1929 return make_error(p, ERROR_UNEXPECTED_END);
1930 }
1931 char p1 = p[1];
1932 char p2 = p[2];
1933 char p3 = p[3];
1934 if (SAJSON_UNLIKELY(p1 != 'r' || p2 != 'u' || p3 != 'e')) {
1935 return make_error(p, ERROR_EXPECTED_TRUE);
1936 }
1937 return p + 4;
1938 }
1939
1940 static double pow10(int64_t exponent) {
1941 if (SAJSON_UNLIKELY(exponent > 308)) {
1942 return std::numeric_limits<double>::infinity();
1943 } else if (SAJSON_UNLIKELY(exponent < -323)) {
1944 return 0.0;
1945 }
1946 static const double constants[] = {
1947 1e-323,1e-322,1e-321,1e-320,1e-319,1e-318,1e-317,1e-316,1e-315,1e-314,
1948 1e-313,1e-312,1e-311,1e-310,1e-309,1e-308,1e-307,1e-306,1e-305,1e-304,
1949 1e-303,1e-302,1e-301,1e-300,1e-299,1e-298,1e-297,1e-296,1e-295,1e-294,
1950 1e-293,1e-292,1e-291,1e-290,1e-289,1e-288,1e-287,1e-286,1e-285,1e-284,
1951 1e-283,1e-282,1e-281,1e-280,1e-279,1e-278,1e-277,1e-276,1e-275,1e-274,
1952 1e-273,1e-272,1e-271,1e-270,1e-269,1e-268,1e-267,1e-266,1e-265,1e-264,
1953 1e-263,1e-262,1e-261,1e-260,1e-259,1e-258,1e-257,1e-256,1e-255,1e-254,
1954 1e-253,1e-252,1e-251,1e-250,1e-249,1e-248,1e-247,1e-246,1e-245,1e-244,
1955 1e-243,1e-242,1e-241,1e-240,1e-239,1e-238,1e-237,1e-236,1e-235,1e-234,
1956 1e-233,1e-232,1e-231,1e-230,1e-229,1e-228,1e-227,1e-226,1e-225,1e-224,
1957 1e-223,1e-222,1e-221,1e-220,1e-219,1e-218,1e-217,1e-216,1e-215,1e-214,
1958 1e-213,1e-212,1e-211,1e-210,1e-209,1e-208,1e-207,1e-206,1e-205,1e-204,
1959 1e-203,1e-202,1e-201,1e-200,1e-199,1e-198,1e-197,1e-196,1e-195,1e-194,
1960 1e-193,1e-192,1e-191,1e-190,1e-189,1e-188,1e-187,1e-186,1e-185,1e-184,
1961 1e-183,1e-182,1e-181,1e-180,1e-179,1e-178,1e-177,1e-176,1e-175,1e-174,
1962 1e-173,1e-172,1e-171,1e-170,1e-169,1e-168,1e-167,1e-166,1e-165,1e-164,
1963 1e-163,1e-162,1e-161,1e-160,1e-159,1e-158,1e-157,1e-156,1e-155,1e-154,
1964 1e-153,1e-152,1e-151,1e-150,1e-149,1e-148,1e-147,1e-146,1e-145,1e-144,
1965 1e-143,1e-142,1e-141,1e-140,1e-139,1e-138,1e-137,1e-136,1e-135,1e-134,
1966 1e-133,1e-132,1e-131,1e-130,1e-129,1e-128,1e-127,1e-126,1e-125,1e-124,
1967 1e-123,1e-122,1e-121,1e-120,1e-119,1e-118,1e-117,1e-116,1e-115,1e-114,
1968 1e-113,1e-112,1e-111,1e-110,1e-109,1e-108,1e-107,1e-106,1e-105,1e-104,
1969 1e-103,1e-102,1e-101,1e-100,1e-99,1e-98,1e-97,1e-96,1e-95,1e-94,1e-93,
1970 1e-92,1e-91,1e-90,1e-89,1e-88,1e-87,1e-86,1e-85,1e-84,1e-83,1e-82,1e-81,
1971 1e-80,1e-79,1e-78,1e-77,1e-76,1e-75,1e-74,1e-73,1e-72,1e-71,1e-70,1e-69,
1972 1e-68,1e-67,1e-66,1e-65,1e-64,1e-63,1e-62,1e-61,1e-60,1e-59,1e-58,1e-57,
1973 1e-56,1e-55,1e-54,1e-53,1e-52,1e-51,1e-50,1e-49,1e-48,1e-47,1e-46,1e-45,
1974 1e-44,1e-43,1e-42,1e-41,1e-40,1e-39,1e-38,1e-37,1e-36,1e-35,1e-34,1e-33,
1975 1e-32,1e-31,1e-30,1e-29,1e-28,1e-27,1e-26,1e-25,1e-24,1e-23,1e-22,1e-21,
1976 1e-20,1e-19,1e-18,1e-17,1e-16,1e-15,1e-14,1e-13,1e-12,1e-11,1e-10,1e-9,
1977 1e-8,1e-7,1e-6,1e-5,1e-4,1e-3,1e-2,1e-1,1e0,1e1,1e2,1e3,1e4,1e5,1e6,1e7,
1978 1e8,1e9,1e10,1e11,1e12,1e13,1e14,1e15,1e16,1e17,1e18,1e19,1e20,1e21,
1979 1e22,1e23,1e24,1e25,1e26,1e27,1e28,1e29,1e30,1e31,1e32,1e33,1e34,1e35,
1980 1e36,1e37,1e38,1e39,1e40,1e41,1e42,1e43,1e44,1e45,1e46,1e47,1e48,1e49,
1981 1e50,1e51,1e52,1e53,1e54,1e55,1e56,1e57,1e58,1e59,1e60,1e61,1e62,1e63,
1982 1e64,1e65,1e66,1e67,1e68,1e69,1e70,1e71,1e72,1e73,1e74,1e75,1e76,1e77,
1983 1e78,1e79,1e80,1e81,1e82,1e83,1e84,1e85,1e86,1e87,1e88,1e89,1e90,1e91,
1984 1e92,1e93,1e94,1e95,1e96,1e97,1e98,1e99,1e100,1e101,1e102,1e103,1e104,
1985 1e105,1e106,1e107,1e108,1e109,1e110,1e111,1e112,1e113,1e114,1e115,1e116,
1986 1e117,1e118,1e119,1e120,1e121,1e122,1e123,1e124,1e125,1e126,1e127,1e128,
1987 1e129,1e130,1e131,1e132,1e133,1e134,1e135,1e136,1e137,1e138,1e139,1e140,
1988 1e141,1e142,1e143,1e144,1e145,1e146,1e147,1e148,1e149,1e150,1e151,1e152,
1989 1e153,1e154,1e155,1e156,1e157,1e158,1e159,1e160,1e161,1e162,1e163,1e164,
1990 1e165,1e166,1e167,1e168,1e169,1e170,1e171,1e172,1e173,1e174,1e175,1e176,
1991 1e177,1e178,1e179,1e180,1e181,1e182,1e183,1e184,1e185,1e186,1e187,1e188,
1992 1e189,1e190,1e191,1e192,1e193,1e194,1e195,1e196,1e197,1e198,1e199,1e200,
1993 1e201,1e202,1e203,1e204,1e205,1e206,1e207,1e208,1e209,1e210,1e211,1e212,
1994 1e213,1e214,1e215,1e216,1e217,1e218,1e219,1e220,1e221,1e222,1e223,1e224,
1995 1e225,1e226,1e227,1e228,1e229,1e230,1e231,1e232,1e233,1e234,1e235,1e236,
1996 1e237,1e238,1e239,1e240,1e241,1e242,1e243,1e244,1e245,1e246,1e247,1e248,
1997 1e249,1e250,1e251,1e252,1e253,1e254,1e255,1e256,1e257,1e258,1e259,1e260,
1998 1e261,1e262,1e263,1e264,1e265,1e266,1e267,1e268,1e269,1e270,1e271,1e272,
1999 1e273,1e274,1e275,1e276,1e277,1e278,1e279,1e280,1e281,1e282,1e283,1e284,
2000 1e285,1e286,1e287,1e288,1e289,1e290,1e291,1e292,1e293,1e294,1e295,1e296,
2001 1e297,1e298,1e299,1e300,1e301,1e302,1e303,1e304,1e305,1e306,1e307,1e308
2002 };
2003 return constants[exponent + 323];
2004 }
2005
2006 std::pair<char*, type> parse_number(char* p) {
2007 bool negative = false;
2008 if ('-' == *p) {
2009 ++p;
2010 negative = true;
2011
2012 if (SAJSON_UNLIKELY(at_eof(p))) {
2013 return std::make_pair(make_error(p, ERROR_UNEXPECTED_END), TYPE_NULL);
2014 }
2015 }
2016
2017 bool try_double = false;
2018
2019 int i = 0;
2020 double d = 0.0; // gcc complains that d might be used uninitialized which isn't true. appease the warning anyway.
2021 if (*p == '0') {
2022 ++p;
2023 if (SAJSON_UNLIKELY(at_eof(p))) {
2024 return std::make_pair(make_error(p, ERROR_UNEXPECTED_END), TYPE_NULL);
2025 }
2026 } else {
2027 unsigned char c = *p;
2028 if (c < '0' || c > '9') {
2029 return std::make_pair(make_error(p, ERROR_INVALID_NUMBER), TYPE_NULL);
2030 }
2031
2032 do {
2033 ++p;
2034 if (SAJSON_UNLIKELY(at_eof(p))) {
2035 return std::make_pair(make_error(p, ERROR_UNEXPECTED_END), TYPE_NULL);
2036 }
2037
2038 unsigned char digit = c - '0';
2039
2040 if (SAJSON_UNLIKELY(!try_double && i > INT_MAX / 10 - 9)) {
2041 // TODO: could split this into two loops
2042 try_double = true;
2043 d = i;
2044 }
2045 if (SAJSON_UNLIKELY(try_double)) {
2046 d = 10.0 * d + digit;
2047 } else {
2048 i = 10 * i + digit;
2049 }
2050
2051 c = *p;
2052 } while (c >= '0' && c <= '9');
2053 }
2054
2055 int64_t exponent = 0;
2056
2057 if ('.' == *p) {
2058 if (!try_double) {
2059 try_double = true;
2060 d = i;
2061 }
2062 ++p;
2063 if (SAJSON_UNLIKELY(at_eof(p))) {
2064 return std::make_pair(make_error(p, ERROR_UNEXPECTED_END), TYPE_NULL);
2065 }
2066 char c = *p;
2067 if (c < '0' || c > '9') {
2068 return std::make_pair(make_error(p, ERROR_INVALID_NUMBER), TYPE_NULL);
2069 }
2070
2071 do {
2072 ++p;
2073 if (SAJSON_UNLIKELY(at_eof(p))) {
2074 return std::make_pair(make_error(p, ERROR_UNEXPECTED_END), TYPE_NULL);
2075 }
2076 d = d * 10 + (c - '0');
2077 // One option to avoid underflow would be to clamp
2078 // to INT_MIN, but int64 subtraction is cheap and
2079 // in the absurd case of parsing 2 GB of digits
2080 // with an extremely high exponent, this will
2081 // produce accurate results. Instead, we just
2082 // leave exponent as int64_t and it will never
2083 // underflow.
2084 --exponent;
2085
2086 c = *p;
2087 } while (c >= '0' && c <= '9');
2088 }
2089
2090 char e = *p;
2091 if ('e' == e || 'E' == e) {
2092 if (!try_double) {
2093 try_double = true;
2094 d = i;
2095 }
2096 ++p;
2097 if (SAJSON_UNLIKELY(at_eof(p))) {
2098 return std::make_pair(make_error(p, ERROR_UNEXPECTED_END), TYPE_NULL);
2099 }
2100
2101 bool negativeExponent = false;
2102 if ('-' == *p) {
2103 negativeExponent = true;
2104 ++p;
2105 if (SAJSON_UNLIKELY(at_eof(p))) {
2106 return std::make_pair(make_error(p, ERROR_UNEXPECTED_END), TYPE_NULL);
2107 }
2108 } else if ('+' == *p) {
2109 ++p;
2110 if (SAJSON_UNLIKELY(at_eof(p))) {
2111 return std::make_pair(make_error(p, ERROR_UNEXPECTED_END), TYPE_NULL);
2112 }
2113 }
2114
2115 int exp = 0;
2116
2117 char c = *p;
2118 if (SAJSON_UNLIKELY(c < '0' || c > '9')) {
2119 return std::make_pair(make_error(p, ERROR_MISSING_EXPONENT), TYPE_NULL);
2120 }
2121 for (;;) {
2122 // c guaranteed to be between '0' and '9', inclusive
2123 unsigned char digit = c - '0';
2124 if (exp > (INT_MAX - digit) / 10) {
2125 // The exponent overflowed. Keep parsing, but
2126 // it will definitely be out of range when
2127 // pow10 is called.
2128 exp = INT_MAX;
2129 } else {
2130 exp = 10 * exp + digit;
2131 }
2132
2133 ++p;
2134 if (SAJSON_UNLIKELY(at_eof(p))) {
2135 return std::make_pair(make_error(p, ERROR_UNEXPECTED_END), TYPE_NULL);
2136 }
2137
2138 c = *p;
2139 if (c < '0' || c > '9') {
2140 break;
2141 }
2142 }
2143 static_assert(-INT_MAX >= INT_MIN, "exp can be negated without loss or UB");
2144 exponent += (negativeExponent ? -exp : exp);
2145 }
2146
2147 if (exponent) {
2148 assert(try_double);
2149 // If d is zero but the exponent is huge, don't
2150 // multiply zero by inf which gives nan.
2151 if (d != 0.0) {
2152 d *= pow10(exponent);
2153 }
2154 }
2155
2156 if (negative) {
2157 if (try_double) {
2158 d = -d;
2159 } else {
2160 i = -i;
2161 }
2162 }
2163 if (try_double) {
2164 bool success;
2165 size_t* out = allocator.reserve(double_storage::word_length, &success);
2166 if (SAJSON_UNLIKELY(!success)) {
2167 return std::make_pair(oom(p), TYPE_NULL);
2168 }
2169 double_storage::store(out, d);
2170 return std::make_pair(p, TYPE_DOUBLE);
2171 } else {
2172 bool success;
2173 size_t* out = allocator.reserve(integer_storage::word_length, &success);
2174 if (SAJSON_UNLIKELY(!success)) {
2175 return std::make_pair(oom(p), TYPE_NULL);
2176 }
2177 integer_storage::store(out, i);
2178 return std::make_pair(p, TYPE_INTEGER);
2179 }
2180 }
2181
2182 bool install_array(size_t* array_base, size_t* array_end) {
2183 using namespace sajson::internal;
2184
2185 const size_t length = array_end - array_base;
2186 bool success;
2187 size_t* const new_base = allocator.reserve(length + 1, &success);
2188 if (SAJSON_UNLIKELY(!success)) {
2189 return false;
2190 }
2191 size_t* out = new_base + length + 1;
2192 size_t* const structure_end = allocator.get_write_pointer_of(0);
2193
2194 while (array_end > array_base) {
2195 size_t element = *--array_end;
2196 type element_type = get_element_type(element);
2197 size_t element_value = get_element_value(element);
2198 size_t* element_ptr = structure_end - element_value;
2199 *--out = make_element(element_type, element_ptr - new_base);
2200 }
2201 *--out = length;
2202 return true;
2203 }
2204
2205 bool install_object(size_t* object_base, size_t* object_end) {
2206 using namespace internal;
2207
2208 assert((object_end - object_base) % 3 == 0);
2209 const size_t length_times_3 = object_end - object_base;
2210#ifndef SAJSON_UNSORTED_OBJECT_KEYS
2211 std::sort(
2212 reinterpret_cast<object_key_record*>(object_base),
2213 reinterpret_cast<object_key_record*>(object_end),
2214 object_key_comparator(input.get_data()));
2215#endif
2216
2217 bool success;
2218 size_t* const new_base = allocator.reserve(length_times_3 + 1, &success);
2219 if (SAJSON_UNLIKELY(!success)) {
2220 return false;
2221 }
2222 size_t* out = new_base + length_times_3 + 1;
2223 size_t* const structure_end = allocator.get_write_pointer_of(0);
2224
2225 while (object_end > object_base) {
2226 size_t element = *--object_end;
2227 type element_type = get_element_type(element);
2228 size_t element_value = get_element_value(element);
2229 size_t* element_ptr = structure_end - element_value;
2230
2231 *--out = make_element(element_type, element_ptr - new_base);
2232 *--out = *--object_end;
2233 *--out = *--object_end;
2234 }
2235 *--out = length_times_3 / 3;
2236 return true;
2237 }
2238
2239 char* parse_string(char* p, size_t* tag) {
2240 using namespace internal;
2241
2242 ++p; // "
2243 size_t start = p - input.get_data();
2244 char* input_end_local = input_end;
2245 while (input_end_local - p >= 4) {
2246 if (!is_plain_string_character(p[0])) { goto found; }
2247 if (!is_plain_string_character(p[1])) { p += 1; goto found; }
2248 if (!is_plain_string_character(p[2])) { p += 2; goto found; }
2249 if (!is_plain_string_character(p[3])) { p += 3; goto found; }
2250 p += 4;
2251 }
2252 for (;;) {
2253 if (SAJSON_UNLIKELY(p >= input_end_local)) {
2254 return make_error(p, ERROR_UNEXPECTED_END);
2255 }
2256
2257 if (!is_plain_string_character(*p)) {
2258 break;
2259 }
2260
2261 ++p;
2262 }
2263 found:
2264 if (SAJSON_LIKELY(*p == '"')) {
2265 tag[0] = start;
2266 tag[1] = p - input.get_data();
2267 *p = '\0';
2268 return p + 1;
2269 }
2270
2271 if (*p >= 0 && *p < 0x20) {
2272 return make_error(p, ERROR_ILLEGAL_CODEPOINT, static_cast<int>(*p));
2273 } else {
2274 // backslash or >0x7f
2275 return parse_string_slow(p, tag, start);
2276 }
2277 }
2278
2279 char* read_hex(char* p, unsigned& u) {
2280 unsigned v = 0;
2281 int i = 4;
2282 while (i--) {
2283 unsigned char c = *p++;
2284 if (c >= '0' && c <= '9') {
2285 c -= '0';
2286 } else if (c >= 'a' && c <= 'f') {
2287 c = c - 'a' + 10;
2288 } else if (c >= 'A' && c <= 'F') {
2289 c = c - 'A' + 10;
2290 } else {
2291 return make_error(p, ERROR_INVALID_UNICODE_ESCAPE);
2292 }
2293 v = (v << 4) + c;
2294 }
2295
2296 u = v;
2297 return p;
2298 }
2299
2300 void write_utf8(unsigned codepoint, char*& end) {
2301 if (codepoint < 0x80) {
2302 *end++ = codepoint;
2303 } else if (codepoint < 0x800) {
2304 *end++ = 0xC0 | (codepoint >> 6);
2305 *end++ = 0x80 | (codepoint & 0x3F);
2306 } else if (codepoint < 0x10000) {
2307 *end++ = 0xE0 | (codepoint >> 12);
2308 *end++ = 0x80 | ((codepoint >> 6) & 0x3F);
2309 *end++ = 0x80 | (codepoint & 0x3F);
2310 } else {
2311 assert(codepoint < 0x200000);
2312 *end++ = 0xF0 | (codepoint >> 18);
2313 *end++ = 0x80 | ((codepoint >> 12) & 0x3F);
2314 *end++ = 0x80 | ((codepoint >> 6) & 0x3F);
2315 *end++ = 0x80 | (codepoint & 0x3F);
2316 }
2317 }
2318
2319 char* parse_string_slow(char* p, size_t* tag, size_t start) {
2320 char* end = p;
2321 char* input_end_local = input_end;
2322
2323 for (;;) {
2324 if (SAJSON_UNLIKELY(p >= input_end_local)) {
2325 return make_error(p, ERROR_UNEXPECTED_END);
2326 }
2327
2328 if (SAJSON_UNLIKELY(*p >= 0 && *p < 0x20)) {
2329 return make_error(p, ERROR_ILLEGAL_CODEPOINT, static_cast<int>(*p));
2330 }
2331
2332 switch (*p) {
2333 case '"':
2334 tag[0] = start;
2335 tag[1] = end - input.get_data();
2336 *end = '\0';
2337 return p + 1;
2338
2339 case '\\':
2340 ++p;
2341 if (SAJSON_UNLIKELY(p >= input_end_local)) {
2342 return make_error(p, ERROR_UNEXPECTED_END);
2343 }
2344
2345 char replacement;
2346 switch (*p) {
2347 case '"': replacement = '"'; goto replace;
2348 case '\\': replacement = '\\'; goto replace;
2349 case '/': replacement = '/'; goto replace;
2350 case 'b': replacement = '\b'; goto replace;
2351 case 'f': replacement = '\f'; goto replace;
2352 case 'n': replacement = '\n'; goto replace;
2353 case 'r': replacement = '\r'; goto replace;
2354 case 't': replacement = '\t'; goto replace;
2355 replace:
2356 *end++ = replacement;
2357 ++p;
2358 break;
2359 case 'u': {
2360 ++p;
2361 if (SAJSON_UNLIKELY(!has_remaining_characters(p, 4))) {
2362 return make_error(p, ERROR_UNEXPECTED_END);
2363 }
2364 unsigned u = 0; // gcc's complaining that this could be used uninitialized. wrong.
2365 p = read_hex(p, u);
2366 if (!p) {
2367 return 0;
2368 }
2369 if (u >= 0xD800 && u <= 0xDBFF) {
2370 if (SAJSON_UNLIKELY(!has_remaining_characters(p, 6))) {
2371 return make_error(p, ERROR_UNEXPECTED_END_OF_UTF16);
2372 }
2373 char p0 = p[0];
2374 char p1 = p[1];
2375 if (p0 != '\\' || p1 != 'u') {
2376 return make_error(p, ERROR_EXPECTED_U);
2377 }
2378 p += 2;
2379 unsigned v = 0; // gcc's complaining that this could be used uninitialized. wrong.
2380 p = read_hex(p, v);
2381 if (!p) {
2382 return p;
2383 }
2384
2385 if (v < 0xDC00 || v > 0xDFFF) {
2386 return make_error(p, ERROR_INVALID_UTF16_TRAIL_SURROGATE);
2387 }
2388 u = 0x10000 + (((u - 0xD800) << 10) | (v - 0xDC00));
2389 }
2390 write_utf8(u, end);
2391 break;
2392 }
2393 default:
2394 return make_error(p, ERROR_UNKNOWN_ESCAPE);
2395 }
2396 break;
2397
2398 default:
2399 // validate UTF-8
2400 unsigned char c0 = p[0];
2401 if (c0 < 128) {
2402 *end++ = *p++;
2403 } else if (c0 < 224) {
2404 if (SAJSON_UNLIKELY(!has_remaining_characters(p, 2))) {
2405 return unexpected_end(p);
2406 }
2407 unsigned char c1 = p[1];
2408 if (c1 < 128 || c1 >= 192) {
2409 return make_error(p + 1, ERROR_INVALID_UTF8);
2410 }
2411 end[0] = c0;
2412 end[1] = c1;
2413 end += 2;
2414 p += 2;
2415 } else if (c0 < 240) {
2416 if (SAJSON_UNLIKELY(!has_remaining_characters(p, 3))) {
2417 return unexpected_end(p);
2418 }
2419 unsigned char c1 = p[1];
2420 if (c1 < 128 || c1 >= 192) {
2421 return make_error(p + 1, ERROR_INVALID_UTF8);
2422 }
2423 unsigned char c2 = p[2];
2424 if (c2 < 128 || c2 >= 192) {
2425 return make_error(p + 2, ERROR_INVALID_UTF8);
2426 }
2427 end[0] = c0;
2428 end[1] = c1;
2429 end[2] = c2;
2430 end += 3;
2431 p += 3;
2432 } else if (c0 < 248) {
2433 if (SAJSON_UNLIKELY(!has_remaining_characters(p, 4))) {
2434 return unexpected_end(p);
2435 }
2436 unsigned char c1 = p[1];
2437 if (c1 < 128 || c1 >= 192) {
2438 return make_error(p + 1, ERROR_INVALID_UTF8);
2439 }
2440 unsigned char c2 = p[2];
2441 if (c2 < 128 || c2 >= 192) {
2442 return make_error(p + 2, ERROR_INVALID_UTF8);
2443 }
2444 unsigned char c3 = p[3];
2445 if (c3 < 128 || c3 >= 192) {
2446 return make_error(p + 3, ERROR_INVALID_UTF8);
2447 }
2448 end[0] = c0;
2449 end[1] = c1;
2450 end[2] = c2;
2451 end[3] = c3;
2452 end += 4;
2453 p += 4;
2454 } else {
2455 return make_error(p, ERROR_INVALID_UTF8);
2456 }
2457 break;
2458 }
2459 }
2460 }
2461
2462 mutable_string_view input;
2463 char* const input_end;
2464 Allocator allocator;
2465
2466 type root_type;
2467 size_t error_line;
2468 size_t error_column;
2469 error error_code;
2470 int error_arg; // optional argument for the error
2471 };
2472 /// \endcond
2473
2474 /**
2475 * Parses a string of JSON bytes into a \ref document, given an allocation
2476 * strategy instance. Any kind of string type is valid as long as a
2477 * mutable_string_view can be constructed from it.
2478 *
2479 * Valid allocation strategies are \ref single_allocation,
2480 * \ref dynamic_allocation, and \ref bounded_allocation.
2481 *
2482 * A \ref document is returned whether or not the parse succeeds: success
2483 * state is available by calling document::is_valid().
2484 */
2485 template<typename AllocationStrategy, typename StringType>
2486 document parse(const AllocationStrategy& strategy, const StringType& string) {
2487 mutable_string_view input(string);
2488
2489 bool success;
2490 auto allocator = strategy.make_allocator(input.length(), &success);
2491 if (!success) {
2492 return document(input, 1, 1, ERROR_OUT_OF_MEMORY, 0);
2493 }
2494
2495 return parser<typename AllocationStrategy::allocator>(
2496 input,
2497 std::move(allocator)
2498 ).get_document();
2499 }
2500}
2501
2502#pragma GCC diagnostic pop