JsonCpp project page Classes Namespace JsonCpp home page

json_reader.cpp
Go to the documentation of this file.
1// Copyright 2007-2011 Baptiste Lepilleur and The JsonCpp Authors
2// Copyright (C) 2016 InfoTeCS JSC. All rights reserved.
3// Distributed under MIT license, or public domain if desired and
4// recognized in your jurisdiction.
5// See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE
6
7#if !defined(JSON_IS_AMALGAMATION)
8#include "json_tool.h"
9#include <json/assertions.h>
10#include <json/reader.h>
11#include <json/value.h>
12#endif // if !defined(JSON_IS_AMALGAMATION)
13#include <cassert>
14#include <cstring>
15#include <iostream>
16#include <istream>
17#include <limits>
18#include <memory>
19#include <set>
20#include <sstream>
21#include <utility>
22
23#include <cstdio>
24#if __cplusplus >= 201103L
25
26#if !defined(sscanf)
27#define sscanf std::sscanf
28#endif
29
30#endif //__cplusplus
31
32#if defined(_MSC_VER)
33#if !defined(_CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES)
34#define _CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES 1
35#endif //_CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES
36#endif //_MSC_VER
37
38#if defined(_MSC_VER)
39// Disable warning about strdup being deprecated.
40#pragma warning(disable : 4996)
41#endif
42
43// Define JSONCPP_DEPRECATED_STACK_LIMIT as an appropriate integer at compile
44// time to change the stack limit
45#if !defined(JSONCPP_DEPRECATED_STACK_LIMIT)
46#define JSONCPP_DEPRECATED_STACK_LIMIT 1000
47#endif
48
49static size_t const stackLimit_g =
50 JSONCPP_DEPRECATED_STACK_LIMIT; // see readValue()
51
52namespace Json {
53
54#if __cplusplus >= 201103L || (defined(_CPPLIB_VER) && _CPPLIB_VER >= 520)
55using CharReaderPtr = std::unique_ptr<CharReader>;
56#else
57typedef std::auto_ptr<CharReader> CharReaderPtr;
58#endif
59
60// Implementation of class Features
61// ////////////////////////////////
62
63Features::Features() = default;
64
65Features Features::all() { return {}; }
66
68 Features features;
69 features.allowComments_ = false;
70 features.strictRoot_ = true;
71 features.allowDroppedNullPlaceholders_ = false;
72 features.allowNumericKeys_ = false;
73 return features;
74}
75
76// Implementation of class Reader
77// ////////////////////////////////
78
79bool Reader::containsNewLine(Reader::Location begin, Reader::Location end) {
80 for (; begin < end; ++begin)
81 if (*begin == '\n' || *begin == '\r')
82 return true;
83 return false;
84}
85
86// Class Reader
87// //////////////////////////////////////////////////////////////////
88
89Reader::Reader() : features_(Features::all()) {}
90
91Reader::Reader(const Features& features) : features_(features) {}
92
93bool Reader::parse(const std::string& document, Value& root,
94 bool collectComments) {
95 document_.assign(document.begin(), document.end());
96 const char* begin = document_.c_str();
97 const char* end = begin + document_.length();
98 return parse(begin, end, root, collectComments);
99}
100
101bool Reader::parse(std::istream& is, Value& root, bool collectComments) {
102 // std::istream_iterator<char> begin(is);
103 // std::istream_iterator<char> end;
104 // Those would allow streamed input from a file, if parse() were a
105 // template function.
106
107 // Since String is reference-counted, this at least does not
108 // create an extra copy.
109 String doc;
110 std::getline(is, doc, static_cast<char> EOF);
111 return parse(doc.data(), doc.data() + doc.size(), root, collectComments);
112}
113
114bool Reader::parse(const char* beginDoc, const char* endDoc, Value& root,
115 bool collectComments) {
116 if (!features_.allowComments_) {
117 collectComments = false;
118 }
119
120 begin_ = beginDoc;
121 end_ = endDoc;
122 collectComments_ = collectComments;
123 current_ = begin_;
124 lastValueEnd_ = nullptr;
125 lastValue_ = nullptr;
126 commentsBefore_.clear();
127 errors_.clear();
128 while (!nodes_.empty())
129 nodes_.pop();
130 nodes_.push(&root);
131
132 bool successful = readValue();
133 Token token;
134 skipCommentTokens(token);
135 if (collectComments_ && !commentsBefore_.empty())
136 root.setComment(commentsBefore_, commentAfter);
137 if (features_.strictRoot_) {
138 if (!root.isArray() && !root.isObject()) {
139 // Set error location to start of doc, ideally should be first token found
140 // in doc
141 token.type_ = tokenError;
142 token.start_ = beginDoc;
143 token.end_ = endDoc;
144 addError(
145 "A valid JSON document must be either an array or an object value.",
146 token);
147 return false;
148 }
149 }
150 return successful;
151}
152
153bool Reader::readValue() {
154 // readValue() may call itself only if it calls readObject() or ReadArray().
155 // These methods execute nodes_.push() just before and nodes_.pop)() just
156 // after calling readValue(). parse() executes one nodes_.push(), so > instead
157 // of >=.
158 if (nodes_.size() > stackLimit_g)
159 throwRuntimeError("Exceeded stackLimit in readValue().");
160
161 Token token;
162 skipCommentTokens(token);
163 bool successful = true;
164
165 if (collectComments_ && !commentsBefore_.empty()) {
166 currentValue().setComment(commentsBefore_, commentBefore);
167 commentsBefore_.clear();
168 }
169
170 switch (token.type_) {
171 case tokenObjectBegin:
172 successful = readObject(token);
173 currentValue().setOffsetLimit(current_ - begin_);
174 break;
175 case tokenArrayBegin:
176 successful = readArray(token);
177 currentValue().setOffsetLimit(current_ - begin_);
178 break;
179 case tokenNumber:
180 successful = decodeNumber(token);
181 break;
182 case tokenString:
183 successful = decodeString(token);
184 break;
185 case tokenTrue: {
186 Value v(true);
187 currentValue().swapPayload(v);
188 currentValue().setOffsetStart(token.start_ - begin_);
189 currentValue().setOffsetLimit(token.end_ - begin_);
190 } break;
191 case tokenFalse: {
192 Value v(false);
193 currentValue().swapPayload(v);
194 currentValue().setOffsetStart(token.start_ - begin_);
195 currentValue().setOffsetLimit(token.end_ - begin_);
196 } break;
197 case tokenNull: {
198 Value v;
199 currentValue().swapPayload(v);
200 currentValue().setOffsetStart(token.start_ - begin_);
201 currentValue().setOffsetLimit(token.end_ - begin_);
202 } break;
203 case tokenArraySeparator:
204 case tokenObjectEnd:
205 case tokenArrayEnd:
206 if (features_.allowDroppedNullPlaceholders_) {
207 // "Un-read" the current token and mark the current value as a null
208 // token.
209 current_--;
210 Value v;
211 currentValue().swapPayload(v);
212 currentValue().setOffsetStart(current_ - begin_ - 1);
213 currentValue().setOffsetLimit(current_ - begin_);
214 break;
215 } // Else, fall through...
216 default:
217 currentValue().setOffsetStart(token.start_ - begin_);
218 currentValue().setOffsetLimit(token.end_ - begin_);
219 return addError("Syntax error: value, object or array expected.", token);
220 }
221
222 if (collectComments_) {
223 lastValueEnd_ = current_;
224 lastValue_ = &currentValue();
225 }
226
227 return successful;
228}
229
230void Reader::skipCommentTokens(Token& token) {
231 if (features_.allowComments_) {
232 do {
233 readToken(token);
234 } while (token.type_ == tokenComment);
235 } else {
236 readToken(token);
237 }
238}
239
240bool Reader::readToken(Token& token) {
241 skipSpaces();
242 token.start_ = current_;
243 Char c = getNextChar();
244 bool ok = true;
245 switch (c) {
246 case '{':
247 token.type_ = tokenObjectBegin;
248 break;
249 case '}':
250 token.type_ = tokenObjectEnd;
251 break;
252 case '[':
253 token.type_ = tokenArrayBegin;
254 break;
255 case ']':
256 token.type_ = tokenArrayEnd;
257 break;
258 case '"':
259 token.type_ = tokenString;
260 ok = readString();
261 break;
262 case '/':
263 token.type_ = tokenComment;
264 ok = readComment();
265 break;
266 case '0':
267 case '1':
268 case '2':
269 case '3':
270 case '4':
271 case '5':
272 case '6':
273 case '7':
274 case '8':
275 case '9':
276 case '-':
277 token.type_ = tokenNumber;
278 readNumber();
279 break;
280 case 't':
281 token.type_ = tokenTrue;
282 ok = match("rue", 3);
283 break;
284 case 'f':
285 token.type_ = tokenFalse;
286 ok = match("alse", 4);
287 break;
288 case 'n':
289 token.type_ = tokenNull;
290 ok = match("ull", 3);
291 break;
292 case ',':
293 token.type_ = tokenArraySeparator;
294 break;
295 case ':':
296 token.type_ = tokenMemberSeparator;
297 break;
298 case 0:
299 token.type_ = tokenEndOfStream;
300 break;
301 default:
302 ok = false;
303 break;
304 }
305 if (!ok)
306 token.type_ = tokenError;
307 token.end_ = current_;
308 return ok;
309}
310
311void Reader::skipSpaces() {
312 while (current_ != end_) {
313 Char c = *current_;
314 if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
315 ++current_;
316 else
317 break;
318 }
319}
320
321bool Reader::match(const Char* pattern, int patternLength) {
322 if (end_ - current_ < patternLength)
323 return false;
324 int index = patternLength;
325 while (index--)
326 if (current_[index] != pattern[index])
327 return false;
328 current_ += patternLength;
329 return true;
330}
331
332bool Reader::readComment() {
333 Location commentBegin = current_ - 1;
334 Char c = getNextChar();
335 bool successful = false;
336 if (c == '*')
337 successful = readCStyleComment();
338 else if (c == '/')
339 successful = readCppStyleComment();
340 if (!successful)
341 return false;
342
343 if (collectComments_) {
345 if (lastValueEnd_ && !containsNewLine(lastValueEnd_, commentBegin)) {
346 if (c != '*' || !containsNewLine(commentBegin, current_))
347 placement = commentAfterOnSameLine;
348 }
349
350 addComment(commentBegin, current_, placement);
351 }
352 return true;
353}
354
355String Reader::normalizeEOL(Reader::Location begin, Reader::Location end) {
356 String normalized;
357 normalized.reserve(static_cast<size_t>(end - begin));
358 Reader::Location current = begin;
359 while (current != end) {
360 char c = *current++;
361 if (c == '\r') {
362 if (current != end && *current == '\n')
363 // convert dos EOL
364 ++current;
365 // convert Mac EOL
366 normalized += '\n';
367 } else {
368 normalized += c;
369 }
370 }
371 return normalized;
372}
373
374void Reader::addComment(Location begin, Location end,
375 CommentPlacement placement) {
376 assert(collectComments_);
377 const String& normalized = normalizeEOL(begin, end);
378 if (placement == commentAfterOnSameLine) {
379 assert(lastValue_ != nullptr);
380 lastValue_->setComment(normalized, placement);
381 } else {
382 commentsBefore_ += normalized;
383 }
384}
385
386bool Reader::readCStyleComment() {
387 while ((current_ + 1) < end_) {
388 Char c = getNextChar();
389 if (c == '*' && *current_ == '/')
390 break;
391 }
392 return getNextChar() == '/';
393}
394
395bool Reader::readCppStyleComment() {
396 while (current_ != end_) {
397 Char c = getNextChar();
398 if (c == '\n')
399 break;
400 if (c == '\r') {
401 // Consume DOS EOL. It will be normalized in addComment.
402 if (current_ != end_ && *current_ == '\n')
403 getNextChar();
404 // Break on Moc OS 9 EOL.
405 break;
406 }
407 }
408 return true;
409}
410
411void Reader::readNumber() {
412 Location p = current_;
413 char c = '0'; // stopgap for already consumed character
414 // integral part
415 while (c >= '0' && c <= '9')
416 c = (current_ = p) < end_ ? *p++ : '\0';
417 // fractional part
418 if (c == '.') {
419 c = (current_ = p) < end_ ? *p++ : '\0';
420 while (c >= '0' && c <= '9')
421 c = (current_ = p) < end_ ? *p++ : '\0';
422 }
423 // exponential part
424 if (c == 'e' || c == 'E') {
425 c = (current_ = p) < end_ ? *p++ : '\0';
426 if (c == '+' || c == '-')
427 c = (current_ = p) < end_ ? *p++ : '\0';
428 while (c >= '0' && c <= '9')
429 c = (current_ = p) < end_ ? *p++ : '\0';
430 }
431}
432
433bool Reader::readString() {
434 Char c = '\0';
435 while (current_ != end_) {
436 c = getNextChar();
437 if (c == '\\')
438 getNextChar();
439 else if (c == '"')
440 break;
441 }
442 return c == '"';
443}
444
445bool Reader::readObject(Token& token) {
446 Token tokenName;
447 String name;
448 Value init(objectValue);
449 currentValue().swapPayload(init);
450 currentValue().setOffsetStart(token.start_ - begin_);
451 while (readToken(tokenName)) {
452 bool initialTokenOk = true;
453 while (tokenName.type_ == tokenComment && initialTokenOk)
454 initialTokenOk = readToken(tokenName);
455 if (!initialTokenOk)
456 break;
457 if (tokenName.type_ == tokenObjectEnd && name.empty()) // empty object
458 return true;
459 name.clear();
460 if (tokenName.type_ == tokenString) {
461 if (!decodeString(tokenName, name))
462 return recoverFromError(tokenObjectEnd);
463 } else if (tokenName.type_ == tokenNumber && features_.allowNumericKeys_) {
464 Value numberName;
465 if (!decodeNumber(tokenName, numberName))
466 return recoverFromError(tokenObjectEnd);
467 name = String(numberName.asCString());
468 } else {
469 break;
470 }
471
472 Token colon;
473 if (!readToken(colon) || colon.type_ != tokenMemberSeparator) {
474 return addErrorAndRecover("Missing ':' after object member name", colon,
475 tokenObjectEnd);
476 }
477 Value& value = currentValue()[name];
478 nodes_.push(&value);
479 bool ok = readValue();
480 nodes_.pop();
481 if (!ok) // error already set
482 return recoverFromError(tokenObjectEnd);
483
484 Token comma;
485 if (!readToken(comma) ||
486 (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator &&
487 comma.type_ != tokenComment)) {
488 return addErrorAndRecover("Missing ',' or '}' in object declaration",
489 comma, tokenObjectEnd);
490 }
491 bool finalizeTokenOk = true;
492 while (comma.type_ == tokenComment && finalizeTokenOk)
493 finalizeTokenOk = readToken(comma);
494 if (comma.type_ == tokenObjectEnd)
495 return true;
496 }
497 return addErrorAndRecover("Missing '}' or object member name", tokenName,
498 tokenObjectEnd);
499}
500
501bool Reader::readArray(Token& token) {
502 Value init(arrayValue);
503 currentValue().swapPayload(init);
504 currentValue().setOffsetStart(token.start_ - begin_);
505 skipSpaces();
506 if (current_ != end_ && *current_ == ']') // empty array
507 {
508 Token endArray;
509 readToken(endArray);
510 return true;
511 }
512 int index = 0;
513 for (;;) {
514 Value& value = currentValue()[index++];
515 nodes_.push(&value);
516 bool ok = readValue();
517 nodes_.pop();
518 if (!ok) // error already set
519 return recoverFromError(tokenArrayEnd);
520
521 Token currentToken;
522 // Accept Comment after last item in the array.
523 ok = readToken(currentToken);
524 while (currentToken.type_ == tokenComment && ok) {
525 ok = readToken(currentToken);
526 }
527 bool badTokenType = (currentToken.type_ != tokenArraySeparator &&
528 currentToken.type_ != tokenArrayEnd);
529 if (!ok || badTokenType) {
530 return addErrorAndRecover("Missing ',' or ']' in array declaration",
531 currentToken, tokenArrayEnd);
532 }
533 if (currentToken.type_ == tokenArrayEnd)
534 break;
535 }
536 return true;
537}
538
539bool Reader::decodeNumber(Token& token) {
540 Value decoded;
541 if (!decodeNumber(token, decoded))
542 return false;
543 currentValue().swapPayload(decoded);
544 currentValue().setOffsetStart(token.start_ - begin_);
545 currentValue().setOffsetLimit(token.end_ - begin_);
546 return true;
547}
548
549bool Reader::decodeNumber(Token& token, Value& decoded) {
550 // Attempts to parse the number as an integer. If the number is
551 // larger than the maximum supported value of an integer then
552 // we decode the number as a double.
553 Location current = token.start_;
554 bool isNegative = *current == '-';
555 if (isNegative)
556 ++current;
557 // TODO: Help the compiler do the div and mod at compile time or get rid of
558 // them.
559 Value::LargestUInt maxIntegerValue =
562 Value::LargestUInt threshold = maxIntegerValue / 10;
563 Value::LargestUInt value = 0;
564 while (current < token.end_) {
565 Char c = *current++;
566 if (c < '0' || c > '9')
567 return decodeDouble(token, decoded);
568 auto digit(static_cast<Value::UInt>(c - '0'));
569 if (value >= threshold) {
570 // We've hit or exceeded the max value divided by 10 (rounded down). If
571 // a) we've only just touched the limit, b) this is the last digit, and
572 // c) it's small enough to fit in that rounding delta, we're okay.
573 // Otherwise treat this number as a double to avoid overflow.
574 if (value > threshold || current != token.end_ ||
575 digit > maxIntegerValue % 10) {
576 return decodeDouble(token, decoded);
577 }
578 }
579 value = value * 10 + digit;
580 }
581 if (isNegative && value == maxIntegerValue)
582 decoded = Value::minLargestInt;
583 else if (isNegative)
584 decoded = -Value::LargestInt(value);
585 else if (value <= Value::LargestUInt(Value::maxInt))
586 decoded = Value::LargestInt(value);
587 else
588 decoded = value;
589 return true;
590}
591
592bool Reader::decodeDouble(Token& token) {
593 Value decoded;
594 if (!decodeDouble(token, decoded))
595 return false;
596 currentValue().swapPayload(decoded);
597 currentValue().setOffsetStart(token.start_ - begin_);
598 currentValue().setOffsetLimit(token.end_ - begin_);
599 return true;
600}
601
602bool Reader::decodeDouble(Token& token, Value& decoded) {
603 double value = 0;
604 String buffer(token.start_, token.end_);
605 IStringStream is(buffer);
606 if (!(is >> value))
607 return addError(
608 "'" + String(token.start_, token.end_) + "' is not a number.", token);
609 decoded = value;
610 return true;
611}
612
613bool Reader::decodeString(Token& token) {
614 String decoded_string;
615 if (!decodeString(token, decoded_string))
616 return false;
617 Value decoded(decoded_string);
618 currentValue().swapPayload(decoded);
619 currentValue().setOffsetStart(token.start_ - begin_);
620 currentValue().setOffsetLimit(token.end_ - begin_);
621 return true;
622}
623
624bool Reader::decodeString(Token& token, String& decoded) {
625 decoded.reserve(static_cast<size_t>(token.end_ - token.start_ - 2));
626 Location current = token.start_ + 1; // skip '"'
627 Location end = token.end_ - 1; // do not include '"'
628 while (current != end) {
629 Char c = *current++;
630 if (c == '"')
631 break;
632 else if (c == '\\') {
633 if (current == end)
634 return addError("Empty escape sequence in string", token, current);
635 Char escape = *current++;
636 switch (escape) {
637 case '"':
638 decoded += '"';
639 break;
640 case '/':
641 decoded += '/';
642 break;
643 case '\\':
644 decoded += '\\';
645 break;
646 case 'b':
647 decoded += '\b';
648 break;
649 case 'f':
650 decoded += '\f';
651 break;
652 case 'n':
653 decoded += '\n';
654 break;
655 case 'r':
656 decoded += '\r';
657 break;
658 case 't':
659 decoded += '\t';
660 break;
661 case 'u': {
662 unsigned int unicode;
663 if (!decodeUnicodeCodePoint(token, current, end, unicode))
664 return false;
665 decoded += codePointToUTF8(unicode);
666 } break;
667 default:
668 return addError("Bad escape sequence in string", token, current);
669 }
670 } else {
671 decoded += c;
672 }
673 }
674 return true;
675}
676
677bool Reader::decodeUnicodeCodePoint(Token& token, Location& current,
678 Location end, unsigned int& unicode) {
679
680 if (!decodeUnicodeEscapeSequence(token, current, end, unicode))
681 return false;
682 if (unicode >= 0xD800 && unicode <= 0xDBFF) {
683 // surrogate pairs
684 if (end - current < 6)
685 return addError(
686 "additional six characters expected to parse unicode surrogate pair.",
687 token, current);
688 if (*(current++) == '\\' && *(current++) == 'u') {
689 unsigned int surrogatePair;
690 if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair)) {
691 unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
692 } else
693 return false;
694 } else
695 return addError("expecting another \\u token to begin the second half of "
696 "a unicode surrogate pair",
697 token, current);
698 }
699 return true;
700}
701
702bool Reader::decodeUnicodeEscapeSequence(Token& token, Location& current,
703 Location end,
704 unsigned int& ret_unicode) {
705 if (end - current < 4)
706 return addError(
707 "Bad unicode escape sequence in string: four digits expected.", token,
708 current);
709 int unicode = 0;
710 for (int index = 0; index < 4; ++index) {
711 Char c = *current++;
712 unicode *= 16;
713 if (c >= '0' && c <= '9')
714 unicode += c - '0';
715 else if (c >= 'a' && c <= 'f')
716 unicode += c - 'a' + 10;
717 else if (c >= 'A' && c <= 'F')
718 unicode += c - 'A' + 10;
719 else
720 return addError(
721 "Bad unicode escape sequence in string: hexadecimal digit expected.",
722 token, current);
723 }
724 ret_unicode = static_cast<unsigned int>(unicode);
725 return true;
726}
727
728bool Reader::addError(const String& message, Token& token, Location extra) {
729 ErrorInfo info;
730 info.token_ = token;
731 info.message_ = message;
732 info.extra_ = extra;
733 errors_.push_back(info);
734 return false;
735}
736
737bool Reader::recoverFromError(TokenType skipUntilToken) {
738 size_t const errorCount = errors_.size();
739 Token skip;
740 for (;;) {
741 if (!readToken(skip))
742 errors_.resize(errorCount); // discard errors caused by recovery
743 if (skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream)
744 break;
745 }
746 errors_.resize(errorCount);
747 return false;
748}
749
750bool Reader::addErrorAndRecover(const String& message, Token& token,
751 TokenType skipUntilToken) {
752 addError(message, token);
753 return recoverFromError(skipUntilToken);
754}
755
756Value& Reader::currentValue() { return *(nodes_.top()); }
757
758Reader::Char Reader::getNextChar() {
759 if (current_ == end_)
760 return 0;
761 return *current_++;
762}
763
764void Reader::getLocationLineAndColumn(Location location, int& line,
765 int& column) const {
766 Location current = begin_;
767 Location lastLineStart = current;
768 line = 0;
769 while (current < location && current != end_) {
770 Char c = *current++;
771 if (c == '\r') {
772 if (*current == '\n')
773 ++current;
774 lastLineStart = current;
775 ++line;
776 } else if (c == '\n') {
777 lastLineStart = current;
778 ++line;
779 }
780 }
781 // column & line start at 1
782 column = int(location - lastLineStart) + 1;
783 ++line;
784}
785
786String Reader::getLocationLineAndColumn(Location location) const {
787 int line, column;
788 getLocationLineAndColumn(location, line, column);
789 char buffer[18 + 16 + 16 + 1];
790 jsoncpp_snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
791 return buffer;
792}
793
794// Deprecated. Preserved for backward compatibility
795String Reader::getFormatedErrorMessages() const {
796 return getFormattedErrorMessages();
797}
798
799String Reader::getFormattedErrorMessages() const {
800 String formattedMessage;
801 for (const auto& error : errors_) {
802 formattedMessage +=
803 "* " + getLocationLineAndColumn(error.token_.start_) + "\n";
804 formattedMessage += " " + error.message_ + "\n";
805 if (error.extra_)
806 formattedMessage +=
807 "See " + getLocationLineAndColumn(error.extra_) + " for detail.\n";
808 }
809 return formattedMessage;
810}
811
812std::vector<Reader::StructuredError> Reader::getStructuredErrors() const {
813 std::vector<Reader::StructuredError> allErrors;
814 for (const auto& error : errors_) {
815 Reader::StructuredError structured;
816 structured.offset_start = error.token_.start_ - begin_;
817 structured.offset_limit = error.token_.end_ - begin_;
818 structured.message = error.message_;
819 allErrors.push_back(structured);
820 }
821 return allErrors;
822}
823
824bool Reader::pushError(const Value& value, const String& message) {
825 ptrdiff_t const length = end_ - begin_;
826 if (value.getOffsetStart() > length || value.getOffsetLimit() > length)
827 return false;
828 Token token;
829 token.type_ = tokenError;
830 token.start_ = begin_ + value.getOffsetStart();
831 token.end_ = begin_ + value.getOffsetLimit();
832 ErrorInfo info;
833 info.token_ = token;
834 info.message_ = message;
835 info.extra_ = nullptr;
836 errors_.push_back(info);
837 return true;
838}
839
840bool Reader::pushError(const Value& value, const String& message,
841 const Value& extra) {
842 ptrdiff_t const length = end_ - begin_;
843 if (value.getOffsetStart() > length || value.getOffsetLimit() > length ||
844 extra.getOffsetLimit() > length)
845 return false;
846 Token token;
847 token.type_ = tokenError;
848 token.start_ = begin_ + value.getOffsetStart();
849 token.end_ = begin_ + value.getOffsetLimit();
850 ErrorInfo info;
851 info.token_ = token;
852 info.message_ = message;
853 info.extra_ = begin_ + extra.getOffsetStart();
854 errors_.push_back(info);
855 return true;
856}
857
858bool Reader::good() const { return errors_.empty(); }
859
860// Originally copied from the Features class (now deprecated), used internally
861// for features implementation.
862class OurFeatures {
863public:
864 static OurFeatures all();
865 bool allowComments_;
866 bool strictRoot_;
867 bool allowDroppedNullPlaceholders_;
868 bool allowNumericKeys_;
869 bool allowSingleQuotes_;
870 bool failIfExtra_;
871 bool rejectDupKeys_;
872 bool allowSpecialFloats_;
873 size_t stackLimit_;
874}; // OurFeatures
875
876OurFeatures OurFeatures::all() { return {}; }
877
878// Implementation of class Reader
879// ////////////////////////////////
880
881// Originally copied from the Reader class (now deprecated), used internally
882// for implementing JSON reading.
883class OurReader {
884public:
885 using Char = char;
886 using Location = const Char*;
887 struct StructuredError {
888 ptrdiff_t offset_start;
889 ptrdiff_t offset_limit;
890 String message;
891 };
892
893 explicit OurReader(OurFeatures const& features);
894 bool parse(const char* beginDoc, const char* endDoc, Value& root,
895 bool collectComments = true);
896 String getFormattedErrorMessages() const;
897 std::vector<StructuredError> getStructuredErrors() const;
898
899private:
900 OurReader(OurReader const&); // no impl
901 void operator=(OurReader const&); // no impl
902
903 enum TokenType {
904 tokenEndOfStream = 0,
905 tokenObjectBegin,
906 tokenObjectEnd,
907 tokenArrayBegin,
908 tokenArrayEnd,
909 tokenString,
910 tokenNumber,
911 tokenTrue,
912 tokenFalse,
913 tokenNull,
914 tokenNaN,
915 tokenPosInf,
916 tokenNegInf,
917 tokenArraySeparator,
918 tokenMemberSeparator,
919 tokenComment,
920 tokenError
921 };
922
923 class Token {
924 public:
925 TokenType type_;
926 Location start_;
927 Location end_;
928 };
929
930 class ErrorInfo {
931 public:
932 Token token_;
933 String message_;
934 Location extra_;
935 };
936
937 using Errors = std::deque<ErrorInfo>;
938
939 bool readToken(Token& token);
940 void skipSpaces();
941 bool match(const Char* pattern, int patternLength);
942 bool readComment();
943 bool readCStyleComment(bool* containsNewLineResult);
944 bool readCppStyleComment();
945 bool readString();
946 bool readStringSingleQuote();
947 bool readNumber(bool checkInf);
948 bool readValue();
949 bool readObject(Token& token);
950 bool readArray(Token& token);
951 bool decodeNumber(Token& token);
952 bool decodeNumber(Token& token, Value& decoded);
953 bool decodeString(Token& token);
954 bool decodeString(Token& token, String& decoded);
955 bool decodeDouble(Token& token);
956 bool decodeDouble(Token& token, Value& decoded);
957 bool decodeUnicodeCodePoint(Token& token, Location& current, Location end,
958 unsigned int& unicode);
959 bool decodeUnicodeEscapeSequence(Token& token, Location& current,
960 Location end, unsigned int& unicode);
961 bool addError(const String& message, Token& token, Location extra = nullptr);
962 bool recoverFromError(TokenType skipUntilToken);
963 bool addErrorAndRecover(const String& message, Token& token,
964 TokenType skipUntilToken);
965 void skipUntilSpace();
966 Value& currentValue();
967 Char getNextChar();
968 void getLocationLineAndColumn(Location location, int& line,
969 int& column) const;
970 String getLocationLineAndColumn(Location location) const;
971 void addComment(Location begin, Location end, CommentPlacement placement);
972 void skipCommentTokens(Token& token);
973
974 static String normalizeEOL(Location begin, Location end);
975 static bool containsNewLine(Location begin, Location end);
976
977 using Nodes = std::stack<Value*>;
978
979 Nodes nodes_{};
980 Errors errors_{};
981 String document_{};
982 Location begin_ = nullptr;
983 Location end_ = nullptr;
984 Location current_ = nullptr;
985 Location lastValueEnd_ = nullptr;
986 Value* lastValue_ = nullptr;
987 bool lastValueHasAComment_ = false;
988 String commentsBefore_{};
989
990 OurFeatures const features_;
991 bool collectComments_ = false;
992}; // OurReader
993
994// complete copy of Read impl, for OurReader
995
996bool OurReader::containsNewLine(OurReader::Location begin,
997 OurReader::Location end) {
998 for (; begin < end; ++begin)
999 if (*begin == '\n' || *begin == '\r')
1000 return true;
1001 return false;
1002}
1003
1004OurReader::OurReader(OurFeatures const& features) : features_(features) {}
1005
1006bool OurReader::parse(const char* beginDoc, const char* endDoc, Value& root,
1007 bool collectComments) {
1008 if (!features_.allowComments_) {
1009 collectComments = false;
1010 }
1011
1012 begin_ = beginDoc;
1013 end_ = endDoc;
1014 collectComments_ = collectComments;
1015 current_ = begin_;
1016 lastValueEnd_ = nullptr;
1017 lastValue_ = nullptr;
1018 commentsBefore_.clear();
1019 errors_.clear();
1020 while (!nodes_.empty())
1021 nodes_.pop();
1022 nodes_.push(&root);
1023
1024 bool successful = readValue();
1025 nodes_.pop();
1026 Token token;
1027 skipCommentTokens(token);
1028 if (features_.failIfExtra_ && (token.type_ != tokenEndOfStream)) {
1029 addError("Extra non-whitespace after JSON value.", token);
1030 return false;
1031 }
1032 if (collectComments_ && !commentsBefore_.empty())
1033 root.setComment(commentsBefore_, commentAfter);
1034 if (features_.strictRoot_) {
1035 if (!root.isArray() && !root.isObject()) {
1036 // Set error location to start of doc, ideally should be first token found
1037 // in doc
1038 token.type_ = tokenError;
1039 token.start_ = beginDoc;
1040 token.end_ = endDoc;
1041 addError(
1042 "A valid JSON document must be either an array or an object value.",
1043 token);
1044 return false;
1045 }
1046 }
1047 return successful;
1048}
1049
1050bool OurReader::readValue() {
1051 // To preserve the old behaviour we cast size_t to int.
1052 if (nodes_.size() > features_.stackLimit_)
1053 throwRuntimeError("Exceeded stackLimit in readValue().");
1054 Token token;
1055 skipCommentTokens(token);
1056 bool successful = true;
1057
1058 if (collectComments_ && !commentsBefore_.empty()) {
1059 currentValue().setComment(commentsBefore_, commentBefore);
1060 commentsBefore_.clear();
1061 }
1062
1063 switch (token.type_) {
1064 case tokenObjectBegin:
1065 successful = readObject(token);
1066 currentValue().setOffsetLimit(current_ - begin_);
1067 break;
1068 case tokenArrayBegin:
1069 successful = readArray(token);
1070 currentValue().setOffsetLimit(current_ - begin_);
1071 break;
1072 case tokenNumber:
1073 successful = decodeNumber(token);
1074 break;
1075 case tokenString:
1076 successful = decodeString(token);
1077 break;
1078 case tokenTrue: {
1079 Value v(true);
1080 currentValue().swapPayload(v);
1081 currentValue().setOffsetStart(token.start_ - begin_);
1082 currentValue().setOffsetLimit(token.end_ - begin_);
1083 } break;
1084 case tokenFalse: {
1085 Value v(false);
1086 currentValue().swapPayload(v);
1087 currentValue().setOffsetStart(token.start_ - begin_);
1088 currentValue().setOffsetLimit(token.end_ - begin_);
1089 } break;
1090 case tokenNull: {
1091 Value v;
1092 currentValue().swapPayload(v);
1093 currentValue().setOffsetStart(token.start_ - begin_);
1094 currentValue().setOffsetLimit(token.end_ - begin_);
1095 } break;
1096 case tokenNaN: {
1097 Value v(std::numeric_limits<double>::quiet_NaN());
1098 currentValue().swapPayload(v);
1099 currentValue().setOffsetStart(token.start_ - begin_);
1100 currentValue().setOffsetLimit(token.end_ - begin_);
1101 } break;
1102 case tokenPosInf: {
1103 Value v(std::numeric_limits<double>::infinity());
1104 currentValue().swapPayload(v);
1105 currentValue().setOffsetStart(token.start_ - begin_);
1106 currentValue().setOffsetLimit(token.end_ - begin_);
1107 } break;
1108 case tokenNegInf: {
1109 Value v(-std::numeric_limits<double>::infinity());
1110 currentValue().swapPayload(v);
1111 currentValue().setOffsetStart(token.start_ - begin_);
1112 currentValue().setOffsetLimit(token.end_ - begin_);
1113 } break;
1114 case tokenArraySeparator:
1115 case tokenObjectEnd:
1116 case tokenArrayEnd:
1117 if (features_.allowDroppedNullPlaceholders_) {
1118 // "Un-read" the current token and mark the current value as a null
1119 // token.
1120 current_--;
1121 Value v;
1122 currentValue().swapPayload(v);
1123 currentValue().setOffsetStart(current_ - begin_ - 1);
1124 currentValue().setOffsetLimit(current_ - begin_);
1125 break;
1126 } // else, fall through ...
1127 default:
1128 currentValue().setOffsetStart(token.start_ - begin_);
1129 currentValue().setOffsetLimit(token.end_ - begin_);
1130 return addError("Syntax error: value, object or array expected.", token);
1131 }
1132
1133 if (collectComments_) {
1134 lastValueEnd_ = current_;
1135 lastValueHasAComment_ = false;
1136 lastValue_ = &currentValue();
1137 }
1138
1139 return successful;
1140}
1141
1142void OurReader::skipCommentTokens(Token& token) {
1143 if (features_.allowComments_) {
1144 do {
1145 readToken(token);
1146 } while (token.type_ == tokenComment);
1147 } else {
1148 readToken(token);
1149 }
1150}
1151
1152bool OurReader::readToken(Token& token) {
1153 skipSpaces();
1154 token.start_ = current_;
1155 Char c = getNextChar();
1156 bool ok = true;
1157 switch (c) {
1158 case '{':
1159 token.type_ = tokenObjectBegin;
1160 break;
1161 case '}':
1162 token.type_ = tokenObjectEnd;
1163 break;
1164 case '[':
1165 token.type_ = tokenArrayBegin;
1166 break;
1167 case ']':
1168 token.type_ = tokenArrayEnd;
1169 break;
1170 case '"':
1171 token.type_ = tokenString;
1172 ok = readString();
1173 break;
1174 case '\'':
1175 if (features_.allowSingleQuotes_) {
1176 token.type_ = tokenString;
1177 ok = readStringSingleQuote();
1178 break;
1179 } // else fall through
1180 case '/':
1181 token.type_ = tokenComment;
1182 ok = readComment();
1183 break;
1184 case '0':
1185 case '1':
1186 case '2':
1187 case '3':
1188 case '4':
1189 case '5':
1190 case '6':
1191 case '7':
1192 case '8':
1193 case '9':
1194 token.type_ = tokenNumber;
1195 readNumber(false);
1196 break;
1197 case '-':
1198 if (readNumber(true)) {
1199 token.type_ = tokenNumber;
1200 } else {
1201 token.type_ = tokenNegInf;
1202 ok = features_.allowSpecialFloats_ && match("nfinity", 7);
1203 }
1204 break;
1205 case '+':
1206 if (readNumber(true)) {
1207 token.type_ = tokenNumber;
1208 } else {
1209 token.type_ = tokenPosInf;
1210 ok = features_.allowSpecialFloats_ && match("nfinity", 7);
1211 }
1212 break;
1213 case 't':
1214 token.type_ = tokenTrue;
1215 ok = match("rue", 3);
1216 break;
1217 case 'f':
1218 token.type_ = tokenFalse;
1219 ok = match("alse", 4);
1220 break;
1221 case 'n':
1222 token.type_ = tokenNull;
1223 ok = match("ull", 3);
1224 break;
1225 case 'N':
1226 if (features_.allowSpecialFloats_) {
1227 token.type_ = tokenNaN;
1228 ok = match("aN", 2);
1229 } else {
1230 ok = false;
1231 }
1232 break;
1233 case 'I':
1234 if (features_.allowSpecialFloats_) {
1235 token.type_ = tokenPosInf;
1236 ok = match("nfinity", 7);
1237 } else {
1238 ok = false;
1239 }
1240 break;
1241 case ',':
1242 token.type_ = tokenArraySeparator;
1243 break;
1244 case ':':
1245 token.type_ = tokenMemberSeparator;
1246 break;
1247 case 0:
1248 token.type_ = tokenEndOfStream;
1249 break;
1250 default:
1251 ok = false;
1252 break;
1253 }
1254 if (!ok)
1255 token.type_ = tokenError;
1256 token.end_ = current_;
1257 return ok;
1258}
1259
1260void OurReader::skipSpaces() {
1261 while (current_ != end_) {
1262 Char c = *current_;
1263 if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
1264 ++current_;
1265 else
1266 break;
1267 }
1268}
1269
1270bool OurReader::match(const Char* pattern, int patternLength) {
1271 if (end_ - current_ < patternLength)
1272 return false;
1273 int index = patternLength;
1274 while (index--)
1275 if (current_[index] != pattern[index])
1276 return false;
1277 current_ += patternLength;
1278 return true;
1279}
1280
1281bool OurReader::readComment() {
1282 const Location commentBegin = current_ - 1;
1283 const Char c = getNextChar();
1284 bool successful = false;
1285 bool cStyleWithEmbeddedNewline = false;
1286
1287 const bool isCStyleComment = (c == '*');
1288 const bool isCppStyleComment = (c == '/');
1289 if (isCStyleComment) {
1290 successful = readCStyleComment(&cStyleWithEmbeddedNewline);
1291 } else if (isCppStyleComment) {
1292 successful = readCppStyleComment();
1293 }
1294
1295 if (!successful)
1296 return false;
1297
1298 if (collectComments_) {
1299 CommentPlacement placement = commentBefore;
1300
1301 if (!lastValueHasAComment_) {
1302 if (lastValueEnd_ && !containsNewLine(lastValueEnd_, commentBegin)) {
1303 if (isCppStyleComment || !cStyleWithEmbeddedNewline) {
1304 placement = commentAfterOnSameLine;
1305 lastValueHasAComment_ = true;
1306 }
1307 }
1308 }
1309
1310 addComment(commentBegin, current_, placement);
1311 }
1312 return true;
1313}
1314
1315String OurReader::normalizeEOL(OurReader::Location begin,
1316 OurReader::Location end) {
1317 String normalized;
1318 normalized.reserve(static_cast<size_t>(end - begin));
1319 OurReader::Location current = begin;
1320 while (current != end) {
1321 char c = *current++;
1322 if (c == '\r') {
1323 if (current != end && *current == '\n')
1324 // convert dos EOL
1325 ++current;
1326 // convert Mac EOL
1327 normalized += '\n';
1328 } else {
1329 normalized += c;
1330 }
1331 }
1332 return normalized;
1333}
1334
1335void OurReader::addComment(Location begin, Location end,
1336 CommentPlacement placement) {
1337 assert(collectComments_);
1338 const String& normalized = normalizeEOL(begin, end);
1339 if (placement == commentAfterOnSameLine) {
1340 assert(lastValue_ != nullptr);
1341 lastValue_->setComment(normalized, placement);
1342 } else {
1343 commentsBefore_ += normalized;
1344 }
1345}
1346
1347bool OurReader::readCStyleComment(bool* containsNewLineResult) {
1348 *containsNewLineResult = false;
1349
1350 while ((current_ + 1) < end_) {
1351 Char c = getNextChar();
1352 if (c == '*' && *current_ == '/') {
1353 break;
1354 } else if (c == '\n') {
1355 *containsNewLineResult = true;
1356 }
1357 }
1358
1359 return getNextChar() == '/';
1360}
1361
1362bool OurReader::readCppStyleComment() {
1363 while (current_ != end_) {
1364 Char c = getNextChar();
1365 if (c == '\n')
1366 break;
1367 if (c == '\r') {
1368 // Consume DOS EOL. It will be normalized in addComment.
1369 if (current_ != end_ && *current_ == '\n')
1370 getNextChar();
1371 // Break on Moc OS 9 EOL.
1372 break;
1373 }
1374 }
1375 return true;
1376}
1377
1378bool OurReader::readNumber(bool checkInf) {
1379 Location p = current_;
1380 if (checkInf && p != end_ && *p == 'I') {
1381 current_ = ++p;
1382 return false;
1383 }
1384 char c = '0'; // stopgap for already consumed character
1385 // integral part
1386 while (c >= '0' && c <= '9')
1387 c = (current_ = p) < end_ ? *p++ : '\0';
1388 // fractional part
1389 if (c == '.') {
1390 c = (current_ = p) < end_ ? *p++ : '\0';
1391 while (c >= '0' && c <= '9')
1392 c = (current_ = p) < end_ ? *p++ : '\0';
1393 }
1394 // exponential part
1395 if (c == 'e' || c == 'E') {
1396 c = (current_ = p) < end_ ? *p++ : '\0';
1397 if (c == '+' || c == '-')
1398 c = (current_ = p) < end_ ? *p++ : '\0';
1399 while (c >= '0' && c <= '9')
1400 c = (current_ = p) < end_ ? *p++ : '\0';
1401 }
1402 return true;
1403}
1404bool OurReader::readString() {
1405 Char c = 0;
1406 while (current_ != end_) {
1407 c = getNextChar();
1408 if (c == '\\')
1409 getNextChar();
1410 else if (c == '"')
1411 break;
1412 }
1413 return c == '"';
1414}
1415
1416bool OurReader::readStringSingleQuote() {
1417 Char c = 0;
1418 while (current_ != end_) {
1419 c = getNextChar();
1420 if (c == '\\')
1421 getNextChar();
1422 else if (c == '\'')
1423 break;
1424 }
1425 return c == '\'';
1426}
1427
1428bool OurReader::readObject(Token& token) {
1429 Token tokenName;
1430 String name;
1431 Value init(objectValue);
1432 currentValue().swapPayload(init);
1433 currentValue().setOffsetStart(token.start_ - begin_);
1434 while (readToken(tokenName)) {
1435 bool initialTokenOk = true;
1436 while (tokenName.type_ == tokenComment && initialTokenOk)
1437 initialTokenOk = readToken(tokenName);
1438 if (!initialTokenOk)
1439 break;
1440 if (tokenName.type_ == tokenObjectEnd && name.empty()) // empty object
1441 return true;
1442 name.clear();
1443 if (tokenName.type_ == tokenString) {
1444 if (!decodeString(tokenName, name))
1445 return recoverFromError(tokenObjectEnd);
1446 } else if (tokenName.type_ == tokenNumber && features_.allowNumericKeys_) {
1447 Value numberName;
1448 if (!decodeNumber(tokenName, numberName))
1449 return recoverFromError(tokenObjectEnd);
1450 name = numberName.asString();
1451 } else {
1452 break;
1453 }
1454 if (name.length() >= (1U << 30))
1455 throwRuntimeError("keylength >= 2^30");
1456 if (features_.rejectDupKeys_ && currentValue().isMember(name)) {
1457 String msg = "Duplicate key: '" + name + "'";
1458 return addErrorAndRecover(msg, tokenName, tokenObjectEnd);
1459 }
1460
1461 Token colon;
1462 if (!readToken(colon) || colon.type_ != tokenMemberSeparator) {
1463 return addErrorAndRecover("Missing ':' after object member name", colon,
1464 tokenObjectEnd);
1465 }
1466 Value& value = currentValue()[name];
1467 nodes_.push(&value);
1468 bool ok = readValue();
1469 nodes_.pop();
1470 if (!ok) // error already set
1471 return recoverFromError(tokenObjectEnd);
1472
1473 Token comma;
1474 if (!readToken(comma) ||
1475 (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator &&
1476 comma.type_ != tokenComment)) {
1477 return addErrorAndRecover("Missing ',' or '}' in object declaration",
1478 comma, tokenObjectEnd);
1479 }
1480 bool finalizeTokenOk = true;
1481 while (comma.type_ == tokenComment && finalizeTokenOk)
1482 finalizeTokenOk = readToken(comma);
1483 if (comma.type_ == tokenObjectEnd)
1484 return true;
1485 }
1486 return addErrorAndRecover("Missing '}' or object member name", tokenName,
1487 tokenObjectEnd);
1488}
1489
1490bool OurReader::readArray(Token& token) {
1491 Value init(arrayValue);
1492 currentValue().swapPayload(init);
1493 currentValue().setOffsetStart(token.start_ - begin_);
1494 skipSpaces();
1495 if (current_ != end_ && *current_ == ']') // empty array
1496 {
1497 Token endArray;
1498 readToken(endArray);
1499 return true;
1500 }
1501 int index = 0;
1502 for (;;) {
1503 Value& value = currentValue()[index++];
1504 nodes_.push(&value);
1505 bool ok = readValue();
1506 nodes_.pop();
1507 if (!ok) // error already set
1508 return recoverFromError(tokenArrayEnd);
1509
1510 Token currentToken;
1511 // Accept Comment after last item in the array.
1512 ok = readToken(currentToken);
1513 while (currentToken.type_ == tokenComment && ok) {
1514 ok = readToken(currentToken);
1515 }
1516 bool badTokenType = (currentToken.type_ != tokenArraySeparator &&
1517 currentToken.type_ != tokenArrayEnd);
1518 if (!ok || badTokenType) {
1519 return addErrorAndRecover("Missing ',' or ']' in array declaration",
1520 currentToken, tokenArrayEnd);
1521 }
1522 if (currentToken.type_ == tokenArrayEnd)
1523 break;
1524 }
1525 return true;
1526}
1527
1528bool OurReader::decodeNumber(Token& token) {
1529 Value decoded;
1530 if (!decodeNumber(token, decoded))
1531 return false;
1532 currentValue().swapPayload(decoded);
1533 currentValue().setOffsetStart(token.start_ - begin_);
1534 currentValue().setOffsetLimit(token.end_ - begin_);
1535 return true;
1536}
1537
1538bool OurReader::decodeNumber(Token& token, Value& decoded) {
1539 // Attempts to parse the number as an integer. If the number is
1540 // larger than the maximum supported value of an integer then
1541 // we decode the number as a double.
1542 Location current = token.start_;
1543 const bool isNegative = *current == '-';
1544 if (isNegative) {
1545 ++current;
1546 }
1547
1548 // We assume we can represent the largest and smallest integer types as
1549 // unsigned integers with separate sign. This is only true if they can fit
1550 // into an unsigned integer.
1552 "Int must be smaller than UInt");
1553
1554 // We need to convert minLargestInt into a positive number. The easiest way
1555 // to do this conversion is to assume our "threshold" value of minLargestInt
1556 // divided by 10 can fit in maxLargestInt when absolute valued. This should
1557 // be a safe assumption.
1559 "The absolute value of minLargestInt must be greater than or "
1560 "equal to maxLargestInt");
1561 static_assert(Value::minLargestInt / 10 >= -Value::maxLargestInt,
1562 "The absolute value of minLargestInt must be only 1 magnitude "
1563 "larger than maxLargest Int");
1564
1565 static constexpr Value::LargestUInt positive_threshold =
1567 static constexpr Value::UInt positive_last_digit = Value::maxLargestUInt % 10;
1568
1569 // For the negative values, we have to be more careful. Since typically
1570 // -Value::minLargestInt will cause an overflow, we first divide by 10 and
1571 // then take the inverse. This assumes that minLargestInt is only a single
1572 // power of 10 different in magnitude, which we check above. For the last
1573 // digit, we take the modulus before negating for the same reason.
1574 static constexpr Value::LargestUInt negative_threshold =
1576 static constexpr Value::UInt negative_last_digit =
1578
1579 const Value::LargestUInt threshold =
1580 isNegative ? negative_threshold : positive_threshold;
1581 const Value::UInt max_last_digit =
1582 isNegative ? negative_last_digit : positive_last_digit;
1583
1584 Value::LargestUInt value = 0;
1585 while (current < token.end_) {
1586 Char c = *current++;
1587 if (c < '0' || c > '9')
1588 return decodeDouble(token, decoded);
1589
1590 const Value::UInt digit(static_cast<Value::UInt>(c - '0'));
1591 if (value >= threshold) {
1592 // We've hit or exceeded the max value divided by 10 (rounded down). If
1593 // a) we've only just touched the limit, meaing value == threshold,
1594 // b) this is the last digit, or
1595 // c) it's small enough to fit in that rounding delta, we're okay.
1596 // Otherwise treat this number as a double to avoid overflow.
1597 if (value > threshold || current != token.end_ ||
1598 digit > max_last_digit) {
1599 return decodeDouble(token, decoded);
1600 }
1601 }
1602 value = value * 10 + digit;
1603 }
1604
1605 if (isNegative) {
1606 // We use the same magnitude assumption here, just in case.
1607 const Value::UInt last_digit = static_cast<Value::UInt>(value % 10);
1608 decoded = -Value::LargestInt(value / 10) * 10 - last_digit;
1609 } else if (value <= Value::LargestUInt(Value::maxLargestInt)) {
1610 decoded = Value::LargestInt(value);
1611 } else {
1612 decoded = value;
1613 }
1614
1615 return true;
1616}
1617
1618bool OurReader::decodeDouble(Token& token) {
1619 Value decoded;
1620 if (!decodeDouble(token, decoded))
1621 return false;
1622 currentValue().swapPayload(decoded);
1623 currentValue().setOffsetStart(token.start_ - begin_);
1624 currentValue().setOffsetLimit(token.end_ - begin_);
1625 return true;
1626}
1627
1628bool OurReader::decodeDouble(Token& token, Value& decoded) {
1629 double value = 0;
1630 const String buffer(token.start_, token.end_);
1631 IStringStream is(buffer);
1632 if (!(is >> value)) {
1633 return addError(
1634 "'" + String(token.start_, token.end_) + "' is not a number.", token);
1635 }
1636 decoded = value;
1637 return true;
1638}
1639
1640bool OurReader::decodeString(Token& token) {
1641 String decoded_string;
1642 if (!decodeString(token, decoded_string))
1643 return false;
1644 Value decoded(decoded_string);
1645 currentValue().swapPayload(decoded);
1646 currentValue().setOffsetStart(token.start_ - begin_);
1647 currentValue().setOffsetLimit(token.end_ - begin_);
1648 return true;
1649}
1650
1651bool OurReader::decodeString(Token& token, String& decoded) {
1652 decoded.reserve(static_cast<size_t>(token.end_ - token.start_ - 2));
1653 Location current = token.start_ + 1; // skip '"'
1654 Location end = token.end_ - 1; // do not include '"'
1655 while (current != end) {
1656 Char c = *current++;
1657 if (c == '"') {
1658 break;
1659 } else if (c == '\\') {
1660 if (current == end)
1661 return addError("Empty escape sequence in string", token, current);
1662 Char escape = *current++;
1663 switch (escape) {
1664 case '"':
1665 decoded += '"';
1666 break;
1667 case '/':
1668 decoded += '/';
1669 break;
1670 case '\\':
1671 decoded += '\\';
1672 break;
1673 case 'b':
1674 decoded += '\b';
1675 break;
1676 case 'f':
1677 decoded += '\f';
1678 break;
1679 case 'n':
1680 decoded += '\n';
1681 break;
1682 case 'r':
1683 decoded += '\r';
1684 break;
1685 case 't':
1686 decoded += '\t';
1687 break;
1688 case 'u': {
1689 unsigned int unicode;
1690 if (!decodeUnicodeCodePoint(token, current, end, unicode))
1691 return false;
1692 decoded += codePointToUTF8(unicode);
1693 } break;
1694 default:
1695 return addError("Bad escape sequence in string", token, current);
1696 }
1697 } else {
1698 decoded += c;
1699 }
1700 }
1701 return true;
1702}
1703
1704bool OurReader::decodeUnicodeCodePoint(Token& token, Location& current,
1705 Location end, unsigned int& unicode) {
1706
1707 if (!decodeUnicodeEscapeSequence(token, current, end, unicode))
1708 return false;
1709 if (unicode >= 0xD800 && unicode <= 0xDBFF) {
1710 // surrogate pairs
1711 if (end - current < 6)
1712 return addError(
1713 "additional six characters expected to parse unicode surrogate pair.",
1714 token, current);
1715 if (*(current++) == '\\' && *(current++) == 'u') {
1716 unsigned int surrogatePair;
1717 if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair)) {
1718 unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
1719 } else
1720 return false;
1721 } else
1722 return addError("expecting another \\u token to begin the second half of "
1723 "a unicode surrogate pair",
1724 token, current);
1725 }
1726 return true;
1727}
1728
1729bool OurReader::decodeUnicodeEscapeSequence(Token& token, Location& current,
1730 Location end,
1731 unsigned int& ret_unicode) {
1732 if (end - current < 4)
1733 return addError(
1734 "Bad unicode escape sequence in string: four digits expected.", token,
1735 current);
1736 int unicode = 0;
1737 for (int index = 0; index < 4; ++index) {
1738 Char c = *current++;
1739 unicode *= 16;
1740 if (c >= '0' && c <= '9')
1741 unicode += c - '0';
1742 else if (c >= 'a' && c <= 'f')
1743 unicode += c - 'a' + 10;
1744 else if (c >= 'A' && c <= 'F')
1745 unicode += c - 'A' + 10;
1746 else
1747 return addError(
1748 "Bad unicode escape sequence in string: hexadecimal digit expected.",
1749 token, current);
1750 }
1751 ret_unicode = static_cast<unsigned int>(unicode);
1752 return true;
1753}
1754
1755bool OurReader::addError(const String& message, Token& token, Location extra) {
1756 ErrorInfo info;
1757 info.token_ = token;
1758 info.message_ = message;
1759 info.extra_ = extra;
1760 errors_.push_back(info);
1761 return false;
1762}
1763
1764bool OurReader::recoverFromError(TokenType skipUntilToken) {
1765 size_t errorCount = errors_.size();
1766 Token skip;
1767 for (;;) {
1768 if (!readToken(skip))
1769 errors_.resize(errorCount); // discard errors caused by recovery
1770 if (skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream)
1771 break;
1772 }
1773 errors_.resize(errorCount);
1774 return false;
1775}
1776
1777bool OurReader::addErrorAndRecover(const String& message, Token& token,
1778 TokenType skipUntilToken) {
1779 addError(message, token);
1780 return recoverFromError(skipUntilToken);
1781}
1782
1783Value& OurReader::currentValue() { return *(nodes_.top()); }
1784
1785OurReader::Char OurReader::getNextChar() {
1786 if (current_ == end_)
1787 return 0;
1788 return *current_++;
1789}
1790
1791void OurReader::getLocationLineAndColumn(Location location, int& line,
1792 int& column) const {
1793 Location current = begin_;
1794 Location lastLineStart = current;
1795 line = 0;
1796 while (current < location && current != end_) {
1797 Char c = *current++;
1798 if (c == '\r') {
1799 if (*current == '\n')
1800 ++current;
1801 lastLineStart = current;
1802 ++line;
1803 } else if (c == '\n') {
1804 lastLineStart = current;
1805 ++line;
1806 }
1807 }
1808 // column & line start at 1
1809 column = int(location - lastLineStart) + 1;
1810 ++line;
1811}
1812
1813String OurReader::getLocationLineAndColumn(Location location) const {
1814 int line, column;
1815 getLocationLineAndColumn(location, line, column);
1816 char buffer[18 + 16 + 16 + 1];
1817 jsoncpp_snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
1818 return buffer;
1819}
1820
1821String OurReader::getFormattedErrorMessages() const {
1822 String formattedMessage;
1823 for (const auto& error : errors_) {
1824 formattedMessage +=
1825 "* " + getLocationLineAndColumn(error.token_.start_) + "\n";
1826 formattedMessage += " " + error.message_ + "\n";
1827 if (error.extra_)
1828 formattedMessage +=
1829 "See " + getLocationLineAndColumn(error.extra_) + " for detail.\n";
1830 }
1831 return formattedMessage;
1832}
1833
1834std::vector<OurReader::StructuredError> OurReader::getStructuredErrors() const {
1835 std::vector<OurReader::StructuredError> allErrors;
1836 for (const auto& error : errors_) {
1837 OurReader::StructuredError structured;
1838 structured.offset_start = error.token_.start_ - begin_;
1839 structured.offset_limit = error.token_.end_ - begin_;
1840 structured.message = error.message_;
1841 allErrors.push_back(structured);
1842 }
1843 return allErrors;
1844}
1845
1846class OurCharReader : public CharReader {
1847 bool const collectComments_;
1848 OurReader reader_;
1849
1850public:
1851 OurCharReader(bool collectComments, OurFeatures const& features)
1852 : collectComments_(collectComments), reader_(features) {}
1853 bool parse(char const* beginDoc, char const* endDoc, Value* root,
1854 String* errs) override {
1855 bool ok = reader_.parse(beginDoc, endDoc, *root, collectComments_);
1856 if (errs) {
1857 *errs = reader_.getFormattedErrorMessages();
1858 }
1859 return ok;
1860 }
1861};
1862
1866 bool collectComments = settings_["collectComments"].asBool();
1867 OurFeatures features = OurFeatures::all();
1868 features.allowComments_ = settings_["allowComments"].asBool();
1869 features.strictRoot_ = settings_["strictRoot"].asBool();
1870 features.allowDroppedNullPlaceholders_ =
1871 settings_["allowDroppedNullPlaceholders"].asBool();
1872 features.allowNumericKeys_ = settings_["allowNumericKeys"].asBool();
1873 features.allowSingleQuotes_ = settings_["allowSingleQuotes"].asBool();
1874
1875 // Stack limit is always a size_t, so we get this as an unsigned int
1876 // regardless of it we have 64-bit integer support enabled.
1877 features.stackLimit_ = static_cast<size_t>(settings_["stackLimit"].asUInt());
1878 features.failIfExtra_ = settings_["failIfExtra"].asBool();
1879 features.rejectDupKeys_ = settings_["rejectDupKeys"].asBool();
1880 features.allowSpecialFloats_ = settings_["allowSpecialFloats"].asBool();
1881 return new OurCharReader(collectComments, features);
1882}
1883static void getValidReaderKeys(std::set<String>* valid_keys) {
1884 valid_keys->clear();
1885 valid_keys->insert("collectComments");
1886 valid_keys->insert("allowComments");
1887 valid_keys->insert("strictRoot");
1888 valid_keys->insert("allowDroppedNullPlaceholders");
1889 valid_keys->insert("allowNumericKeys");
1890 valid_keys->insert("allowSingleQuotes");
1891 valid_keys->insert("stackLimit");
1892 valid_keys->insert("failIfExtra");
1893 valid_keys->insert("rejectDupKeys");
1894 valid_keys->insert("allowSpecialFloats");
1895}
1897 Json::Value my_invalid;
1898 if (!invalid)
1899 invalid = &my_invalid; // so we do not need to test for NULL
1900 Json::Value& inv = *invalid;
1901 std::set<String> valid_keys;
1902 getValidReaderKeys(&valid_keys);
1904 size_t n = keys.size();
1905 for (size_t i = 0; i < n; ++i) {
1906 String const& key = keys[i];
1907 if (valid_keys.find(key) == valid_keys.end()) {
1908 inv[key] = settings_[key];
1909 }
1910 }
1911 return inv.empty();
1912}
1914 return settings_[key];
1915}
1916// static
1919 (*settings)["allowComments"] = false;
1920 (*settings)["strictRoot"] = true;
1921 (*settings)["allowDroppedNullPlaceholders"] = false;
1922 (*settings)["allowNumericKeys"] = false;
1923 (*settings)["allowSingleQuotes"] = false;
1924 (*settings)["stackLimit"] = 1000;
1925 (*settings)["failIfExtra"] = true;
1926 (*settings)["rejectDupKeys"] = true;
1927 (*settings)["allowSpecialFloats"] = false;
1929}
1930// static
1933 (*settings)["collectComments"] = true;
1934 (*settings)["allowComments"] = true;
1935 (*settings)["strictRoot"] = false;
1936 (*settings)["allowDroppedNullPlaceholders"] = false;
1937 (*settings)["allowNumericKeys"] = false;
1938 (*settings)["allowSingleQuotes"] = false;
1939 (*settings)["stackLimit"] = 1000;
1940 (*settings)["failIfExtra"] = false;
1941 (*settings)["rejectDupKeys"] = false;
1942 (*settings)["allowSpecialFloats"] = false;
1944}
1945
1947// global functions
1948
1949bool parseFromStream(CharReader::Factory const& fact, IStream& sin, Value* root,
1950 String* errs) {
1951 OStringStream ssin;
1952 ssin << sin.rdbuf();
1953 String doc = ssin.str();
1954 char const* begin = doc.data();
1955 char const* end = begin + doc.size();
1956 // Note that we do not actually need a null-terminator.
1957 CharReaderPtr const reader(fact.newCharReader());
1958 return reader->parse(begin, end, root, errs);
1959}
1960
1963 String errs;
1964 bool ok = parseFromStream(b, sin, &root, &errs);
1965 if (!ok) {
1966 throwRuntimeError(errs);
1967 }
1968 return sin;
1969}
1970
1971} // namespace Json
virtual CharReader * newCharReader() const =0
Allocate a CharReader via operator new().
Build a CharReader implementation.
Definition reader.h:289
static void setDefaults(Json::Value *settings)
Called by ctor, but you can use this to reset settings_.
Value & operator[](const String &key)
A simple way to update a specific setting.
CharReader * newCharReader() const override
Allocate a CharReader via operator new().
static void strictMode(Json::Value *settings)
Same as old Features::strictMode().
Json::Value settings_
Configuration of this builder.
Definition reader.h:330
~CharReaderBuilder() override
bool validate(Json::Value *invalid) const
Interface for reading JSON from a char array.
Definition reader.h:245
Configuration passed to reader and writer.
bool strictRoot_
true if root must be either an array or an object value.
bool allowComments_
true if comments are allowed. Default: true.
bool allowDroppedNullPlaceholders_
true if dropped null placeholders are allowed. Default: false.
static Features all()
A configuration that allows all features and assumes all strings are UTF-8.
Features()
Initialize the configuration like JsonConfig::allFeatures;.
static Features strictMode()
A configuration that is strictly compatible with the JSON specification.
bool allowNumericKeys_
true if numeric object key are allowed. Default: false.
Represents a JSON value.
Definition value.h:188
bool empty() const
Return true if empty array, empty object, or null; otherwise, false.
static constexpr LargestInt maxLargestInt
Maximum signed integer value that can be stored in a Json::Value.
Definition value.h:221
Json::UInt UInt
Definition value.h:195
ArrayIndex size() const
Number of values in array or object.
Json::LargestInt LargestInt
Definition value.h:201
std::vector< String > Members
Definition value.h:192
Json::LargestUInt LargestUInt
Definition value.h:202
UInt asUInt() const
Members getMemberNames() const
Return a list of the member names.
static constexpr Int maxInt
Maximum signed int value that can be stored in a Json::Value.
Definition value.h:228
bool asBool() const
static constexpr LargestUInt maxLargestUInt
Maximum unsigned integer value that can be stored in a Json::Value.
Definition value.h:223
static constexpr LargestInt minLargestInt
Minimum signed integer value that can be stored in a Json::Value.
Definition value.h:218
#define jsoncpp_snprintf
Definition config.h:79
#define JSONCPP_DEPRECATED_STACK_LIMIT
static size_t const stackLimit_g
JSON (JavaScript Object Notation).
Definition allocator.h:14
std::basic_istringstream< String::value_type, String::traits_type, String::allocator_type > IStringStream
Definition config.h:165
std::basic_ostringstream< String::value_type, String::traits_type, String::allocator_type > OStringStream
Definition config.h:168
std::basic_string< char, std::char_traits< char >, Allocator< char > > String
Definition config.h:162
static void getValidReaderKeys(std::set< String > *valid_keys)
std::auto_ptr< CharReader > CharReaderPtr
CommentPlacement
Definition value.h:108
@ commentAfterOnSameLine
a comment just after a value on the same line
Definition value.h:110
@ commentBefore
a comment placed on the line before a value
Definition value.h:109
@ commentAfter
a comment on the line after a value (only make sense for
Definition value.h:111
@ arrayValue
array value (ordered list)
Definition value.h:104
@ objectValue
object value (collection of name/value pairs).
Definition value.h:105
static String codePointToUTF8(unsigned int cp)
Converts a unicode code-point to UTF-8.
Definition json_tool.h:39
IStream & operator>>(IStream &, Value &)
Read from 'sin' into 'root'.
bool parseFromStream(CharReader::Factory const &, IStream &, Value *root, String *errs)
Consume entire stream and use its begin/end.
std::istream IStream
Definition config.h:169