JsonCpp project page Classes Namespace JsonCpp home page

json_reader.cpp
Go to the documentation of this file.
1 // Copyright 2007-2011 Baptiste Lepilleur and The JsonCpp Authors
2 // Copyright (C) 2016 InfoTeCS JSC. All rights reserved.
3 // Distributed under MIT license, or public domain if desired and
4 // recognized in your jurisdiction.
5 // See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE
6 
7 #if !defined(JSON_IS_AMALGAMATION)
8 #include "json_tool.h"
9 #include <json/assertions.h>
10 #include <json/reader.h>
11 #include <json/value.h>
12 #endif // if !defined(JSON_IS_AMALGAMATION)
13 #include <algorithm>
14 #include <cassert>
15 #include <cstring>
16 #include <iostream>
17 #include <istream>
18 #include <limits>
19 #include <memory>
20 #include <set>
21 #include <sstream>
22 #include <utility>
23 
24 #include <cstdio>
25 #if __cplusplus >= 201103L
26 
27 #if !defined(sscanf)
28 #define sscanf std::sscanf
29 #endif
30 
31 #endif //__cplusplus
32 
33 #if defined(_MSC_VER)
34 #if !defined(_CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES)
35 #define _CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES 1
36 #endif //_CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES
37 #endif //_MSC_VER
38 
39 #if defined(_MSC_VER)
40 // Disable warning about strdup being deprecated.
41 #pragma warning(disable : 4996)
42 #endif
43 
44 // Define JSONCPP_DEPRECATED_STACK_LIMIT as an appropriate integer at compile
45 // time to change the stack limit
46 #if !defined(JSONCPP_DEPRECATED_STACK_LIMIT)
47 #define JSONCPP_DEPRECATED_STACK_LIMIT 1000
48 #endif
49 
50 static size_t const stackLimit_g =
51  JSONCPP_DEPRECATED_STACK_LIMIT; // see readValue()
52 
53 namespace Json {
54 
55 #if __cplusplus >= 201103L || (defined(_CPPLIB_VER) && _CPPLIB_VER >= 520)
56 using CharReaderPtr = std::unique_ptr<CharReader>;
57 #else
58 using CharReaderPtr = std::auto_ptr<CharReader>;
59 #endif
60 
61 // Implementation of class Features
62 // ////////////////////////////////
63 
64 Features::Features() = default;
65 
66 Features Features::all() { return {}; }
67 
69  Features features;
70  features.allowComments_ = false;
71  features.strictRoot_ = true;
72  features.allowDroppedNullPlaceholders_ = false;
73  features.allowNumericKeys_ = false;
74  return features;
75 }
76 
77 // Implementation of class Reader
78 // ////////////////////////////////
79 
80 bool Reader::containsNewLine(Reader::Location begin, Reader::Location end) {
81  return std::any_of(begin, end, [](char b) { return b == '\n' || b == '\r'; });
82 }
83 
84 // Class Reader
85 // //////////////////////////////////////////////////////////////////
86 
87 Reader::Reader() : features_(Features::all()) {}
88 
89 Reader::Reader(const Features& features) : features_(features) {}
90 
91 bool Reader::parse(const std::string& document, Value& root,
92  bool collectComments) {
93  document_.assign(document.begin(), document.end());
94  const char* begin = document_.c_str();
95  const char* end = begin + document_.length();
96  return parse(begin, end, root, collectComments);
97 }
98 
99 bool Reader::parse(std::istream& is, Value& root, bool collectComments) {
100  // std::istream_iterator<char> begin(is);
101  // std::istream_iterator<char> end;
102  // Those would allow streamed input from a file, if parse() were a
103  // template function.
104 
105  // Since String is reference-counted, this at least does not
106  // create an extra copy.
107  String doc(std::istreambuf_iterator<char>(is), {});
108  return parse(doc.data(), doc.data() + doc.size(), root, collectComments);
109 }
110 
111 bool Reader::parse(const char* beginDoc, const char* endDoc, Value& root,
112  bool collectComments) {
113  if (!features_.allowComments_) {
114  collectComments = false;
115  }
116 
117  begin_ = beginDoc;
118  end_ = endDoc;
119  collectComments_ = collectComments;
120  current_ = begin_;
121  lastValueEnd_ = nullptr;
122  lastValue_ = nullptr;
123  commentsBefore_.clear();
124  errors_.clear();
125  while (!nodes_.empty())
126  nodes_.pop();
127  nodes_.push(&root);
128 
129  bool successful = readValue();
130  Token token;
131  skipCommentTokens(token);
132  if (collectComments_ && !commentsBefore_.empty())
133  root.setComment(commentsBefore_, commentAfter);
134  if (features_.strictRoot_) {
135  if (!root.isArray() && !root.isObject()) {
136  // Set error location to start of doc, ideally should be first token found
137  // in doc
138  token.type_ = tokenError;
139  token.start_ = beginDoc;
140  token.end_ = endDoc;
141  addError(
142  "A valid JSON document must be either an array or an object value.",
143  token);
144  return false;
145  }
146  }
147  return successful;
148 }
149 
150 bool Reader::readValue() {
151  // readValue() may call itself only if it calls readObject() or ReadArray().
152  // These methods execute nodes_.push() just before and nodes_.pop)() just
153  // after calling readValue(). parse() executes one nodes_.push(), so > instead
154  // of >=.
155  if (nodes_.size() > stackLimit_g)
156  throwRuntimeError("Exceeded stackLimit in readValue().");
157 
158  Token token;
159  skipCommentTokens(token);
160  bool successful = true;
161 
162  if (collectComments_ && !commentsBefore_.empty()) {
163  currentValue().setComment(commentsBefore_, commentBefore);
164  commentsBefore_.clear();
165  }
166 
167  switch (token.type_) {
168  case tokenObjectBegin:
169  successful = readObject(token);
170  currentValue().setOffsetLimit(current_ - begin_);
171  break;
172  case tokenArrayBegin:
173  successful = readArray(token);
174  currentValue().setOffsetLimit(current_ - begin_);
175  break;
176  case tokenNumber:
177  successful = decodeNumber(token);
178  break;
179  case tokenString:
180  successful = decodeString(token);
181  break;
182  case tokenTrue: {
183  Value v(true);
184  currentValue().swapPayload(v);
185  currentValue().setOffsetStart(token.start_ - begin_);
186  currentValue().setOffsetLimit(token.end_ - begin_);
187  } break;
188  case tokenFalse: {
189  Value v(false);
190  currentValue().swapPayload(v);
191  currentValue().setOffsetStart(token.start_ - begin_);
192  currentValue().setOffsetLimit(token.end_ - begin_);
193  } break;
194  case tokenNull: {
195  Value v;
196  currentValue().swapPayload(v);
197  currentValue().setOffsetStart(token.start_ - begin_);
198  currentValue().setOffsetLimit(token.end_ - begin_);
199  } break;
200  case tokenArraySeparator:
201  case tokenObjectEnd:
202  case tokenArrayEnd:
203  if (features_.allowDroppedNullPlaceholders_) {
204  // "Un-read" the current token and mark the current value as a null
205  // token.
206  current_--;
207  Value v;
208  currentValue().swapPayload(v);
209  currentValue().setOffsetStart(current_ - begin_ - 1);
210  currentValue().setOffsetLimit(current_ - begin_);
211  break;
212  } // Else, fall through...
213  default:
214  currentValue().setOffsetStart(token.start_ - begin_);
215  currentValue().setOffsetLimit(token.end_ - begin_);
216  return addError("Syntax error: value, object or array expected.", token);
217  }
218 
219  if (collectComments_) {
220  lastValueEnd_ = current_;
221  lastValue_ = &currentValue();
222  }
223 
224  return successful;
225 }
226 
227 void Reader::skipCommentTokens(Token& token) {
228  if (features_.allowComments_) {
229  do {
230  readToken(token);
231  } while (token.type_ == tokenComment);
232  } else {
233  readToken(token);
234  }
235 }
236 
237 bool Reader::readToken(Token& token) {
238  skipSpaces();
239  token.start_ = current_;
240  Char c = getNextChar();
241  bool ok = true;
242  switch (c) {
243  case '{':
244  token.type_ = tokenObjectBegin;
245  break;
246  case '}':
247  token.type_ = tokenObjectEnd;
248  break;
249  case '[':
250  token.type_ = tokenArrayBegin;
251  break;
252  case ']':
253  token.type_ = tokenArrayEnd;
254  break;
255  case '"':
256  token.type_ = tokenString;
257  ok = readString();
258  break;
259  case '/':
260  token.type_ = tokenComment;
261  ok = readComment();
262  break;
263  case '0':
264  case '1':
265  case '2':
266  case '3':
267  case '4':
268  case '5':
269  case '6':
270  case '7':
271  case '8':
272  case '9':
273  case '-':
274  token.type_ = tokenNumber;
275  readNumber();
276  break;
277  case 't':
278  token.type_ = tokenTrue;
279  ok = match("rue", 3);
280  break;
281  case 'f':
282  token.type_ = tokenFalse;
283  ok = match("alse", 4);
284  break;
285  case 'n':
286  token.type_ = tokenNull;
287  ok = match("ull", 3);
288  break;
289  case ',':
290  token.type_ = tokenArraySeparator;
291  break;
292  case ':':
293  token.type_ = tokenMemberSeparator;
294  break;
295  case 0:
296  token.type_ = tokenEndOfStream;
297  break;
298  default:
299  ok = false;
300  break;
301  }
302  if (!ok)
303  token.type_ = tokenError;
304  token.end_ = current_;
305  return ok;
306 }
307 
308 void Reader::skipSpaces() {
309  while (current_ != end_) {
310  Char c = *current_;
311  if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
312  ++current_;
313  else
314  break;
315  }
316 }
317 
318 bool Reader::match(const Char* pattern, int patternLength) {
319  if (end_ - current_ < patternLength)
320  return false;
321  int index = patternLength;
322  while (index--)
323  if (current_[index] != pattern[index])
324  return false;
325  current_ += patternLength;
326  return true;
327 }
328 
329 bool Reader::readComment() {
330  Location commentBegin = current_ - 1;
331  Char c = getNextChar();
332  bool successful = false;
333  if (c == '*')
334  successful = readCStyleComment();
335  else if (c == '/')
336  successful = readCppStyleComment();
337  if (!successful)
338  return false;
339 
340  if (collectComments_) {
341  CommentPlacement placement = commentBefore;
342  if (lastValueEnd_ && !containsNewLine(lastValueEnd_, commentBegin)) {
343  if (c != '*' || !containsNewLine(commentBegin, current_))
344  placement = commentAfterOnSameLine;
345  }
346 
347  addComment(commentBegin, current_, placement);
348  }
349  return true;
350 }
351 
352 String Reader::normalizeEOL(Reader::Location begin, Reader::Location end) {
353  String normalized;
354  normalized.reserve(static_cast<size_t>(end - begin));
355  Reader::Location current = begin;
356  while (current != end) {
357  char c = *current++;
358  if (c == '\r') {
359  if (current != end && *current == '\n')
360  // convert dos EOL
361  ++current;
362  // convert Mac EOL
363  normalized += '\n';
364  } else {
365  normalized += c;
366  }
367  }
368  return normalized;
369 }
370 
371 void Reader::addComment(Location begin, Location end,
372  CommentPlacement placement) {
373  assert(collectComments_);
374  const String& normalized = normalizeEOL(begin, end);
375  if (placement == commentAfterOnSameLine) {
376  assert(lastValue_ != nullptr);
377  lastValue_->setComment(normalized, placement);
378  } else {
379  commentsBefore_ += normalized;
380  }
381 }
382 
383 bool Reader::readCStyleComment() {
384  while ((current_ + 1) < end_) {
385  Char c = getNextChar();
386  if (c == '*' && *current_ == '/')
387  break;
388  }
389  return getNextChar() == '/';
390 }
391 
392 bool Reader::readCppStyleComment() {
393  while (current_ != end_) {
394  Char c = getNextChar();
395  if (c == '\n')
396  break;
397  if (c == '\r') {
398  // Consume DOS EOL. It will be normalized in addComment.
399  if (current_ != end_ && *current_ == '\n')
400  getNextChar();
401  // Break on Moc OS 9 EOL.
402  break;
403  }
404  }
405  return true;
406 }
407 
408 void Reader::readNumber() {
409  Location p = current_;
410  char c = '0'; // stopgap for already consumed character
411  // integral part
412  while (c >= '0' && c <= '9')
413  c = (current_ = p) < end_ ? *p++ : '\0';
414  // fractional part
415  if (c == '.') {
416  c = (current_ = p) < end_ ? *p++ : '\0';
417  while (c >= '0' && c <= '9')
418  c = (current_ = p) < end_ ? *p++ : '\0';
419  }
420  // exponential part
421  if (c == 'e' || c == 'E') {
422  c = (current_ = p) < end_ ? *p++ : '\0';
423  if (c == '+' || c == '-')
424  c = (current_ = p) < end_ ? *p++ : '\0';
425  while (c >= '0' && c <= '9')
426  c = (current_ = p) < end_ ? *p++ : '\0';
427  }
428 }
429 
430 bool Reader::readString() {
431  Char c = '\0';
432  while (current_ != end_) {
433  c = getNextChar();
434  if (c == '\\')
435  getNextChar();
436  else if (c == '"')
437  break;
438  }
439  return c == '"';
440 }
441 
442 bool Reader::readObject(Token& token) {
443  Token tokenName;
444  String name;
445  Value init(objectValue);
446  currentValue().swapPayload(init);
447  currentValue().setOffsetStart(token.start_ - begin_);
448  while (readToken(tokenName)) {
449  bool initialTokenOk = true;
450  while (tokenName.type_ == tokenComment && initialTokenOk)
451  initialTokenOk = readToken(tokenName);
452  if (!initialTokenOk)
453  break;
454  if (tokenName.type_ == tokenObjectEnd && name.empty()) // empty object
455  return true;
456  name.clear();
457  if (tokenName.type_ == tokenString) {
458  if (!decodeString(tokenName, name))
459  return recoverFromError(tokenObjectEnd);
460  } else if (tokenName.type_ == tokenNumber && features_.allowNumericKeys_) {
461  Value numberName;
462  if (!decodeNumber(tokenName, numberName))
463  return recoverFromError(tokenObjectEnd);
464  name = numberName.asString();
465  } else {
466  break;
467  }
468 
469  Token colon;
470  if (!readToken(colon) || colon.type_ != tokenMemberSeparator) {
471  return addErrorAndRecover("Missing ':' after object member name", colon,
472  tokenObjectEnd);
473  }
474  Value& value = currentValue()[name];
475  nodes_.push(&value);
476  bool ok = readValue();
477  nodes_.pop();
478  if (!ok) // error already set
479  return recoverFromError(tokenObjectEnd);
480 
481  Token comma;
482  if (!readToken(comma) ||
483  (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator &&
484  comma.type_ != tokenComment)) {
485  return addErrorAndRecover("Missing ',' or '}' in object declaration",
486  comma, tokenObjectEnd);
487  }
488  bool finalizeTokenOk = true;
489  while (comma.type_ == tokenComment && finalizeTokenOk)
490  finalizeTokenOk = readToken(comma);
491  if (comma.type_ == tokenObjectEnd)
492  return true;
493  }
494  return addErrorAndRecover("Missing '}' or object member name", tokenName,
495  tokenObjectEnd);
496 }
497 
498 bool Reader::readArray(Token& token) {
499  Value init(arrayValue);
500  currentValue().swapPayload(init);
501  currentValue().setOffsetStart(token.start_ - begin_);
502  skipSpaces();
503  if (current_ != end_ && *current_ == ']') // empty array
504  {
505  Token endArray;
506  readToken(endArray);
507  return true;
508  }
509  int index = 0;
510  for (;;) {
511  Value& value = currentValue()[index++];
512  nodes_.push(&value);
513  bool ok = readValue();
514  nodes_.pop();
515  if (!ok) // error already set
516  return recoverFromError(tokenArrayEnd);
517 
518  Token currentToken;
519  // Accept Comment after last item in the array.
520  ok = readToken(currentToken);
521  while (currentToken.type_ == tokenComment && ok) {
522  ok = readToken(currentToken);
523  }
524  bool badTokenType = (currentToken.type_ != tokenArraySeparator &&
525  currentToken.type_ != tokenArrayEnd);
526  if (!ok || badTokenType) {
527  return addErrorAndRecover("Missing ',' or ']' in array declaration",
528  currentToken, tokenArrayEnd);
529  }
530  if (currentToken.type_ == tokenArrayEnd)
531  break;
532  }
533  return true;
534 }
535 
536 bool Reader::decodeNumber(Token& token) {
537  Value decoded;
538  if (!decodeNumber(token, decoded))
539  return false;
540  currentValue().swapPayload(decoded);
541  currentValue().setOffsetStart(token.start_ - begin_);
542  currentValue().setOffsetLimit(token.end_ - begin_);
543  return true;
544 }
545 
546 bool Reader::decodeNumber(Token& token, Value& decoded) {
547  // Attempts to parse the number as an integer. If the number is
548  // larger than the maximum supported value of an integer then
549  // we decode the number as a double.
550  Location current = token.start_;
551  bool isNegative = *current == '-';
552  if (isNegative)
553  ++current;
554  // TODO: Help the compiler do the div and mod at compile time or get rid of
555  // them.
556  Value::LargestUInt maxIntegerValue =
557  isNegative ? Value::LargestUInt(Value::maxLargestInt) + 1
559  Value::LargestUInt threshold = maxIntegerValue / 10;
560  Value::LargestUInt value = 0;
561  while (current < token.end_) {
562  Char c = *current++;
563  if (c < '0' || c > '9')
564  return decodeDouble(token, decoded);
565  auto digit(static_cast<Value::UInt>(c - '0'));
566  if (value >= threshold) {
567  // We've hit or exceeded the max value divided by 10 (rounded down). If
568  // a) we've only just touched the limit, b) this is the last digit, and
569  // c) it's small enough to fit in that rounding delta, we're okay.
570  // Otherwise treat this number as a double to avoid overflow.
571  if (value > threshold || current != token.end_ ||
572  digit > maxIntegerValue % 10) {
573  return decodeDouble(token, decoded);
574  }
575  }
576  value = value * 10 + digit;
577  }
578  if (isNegative && value == maxIntegerValue)
579  decoded = Value::minLargestInt;
580  else if (isNegative)
581  decoded = -Value::LargestInt(value);
582  else if (value <= Value::LargestUInt(Value::maxInt))
583  decoded = Value::LargestInt(value);
584  else
585  decoded = value;
586  return true;
587 }
588 
589 bool Reader::decodeDouble(Token& token) {
590  Value decoded;
591  if (!decodeDouble(token, decoded))
592  return false;
593  currentValue().swapPayload(decoded);
594  currentValue().setOffsetStart(token.start_ - begin_);
595  currentValue().setOffsetLimit(token.end_ - begin_);
596  return true;
597 }
598 
599 bool Reader::decodeDouble(Token& token, Value& decoded) {
600  double value = 0;
601  String buffer(token.start_, token.end_);
602  IStringStream is(buffer);
603  if (!(is >> value))
604  return addError(
605  "'" + String(token.start_, token.end_) + "' is not a number.", token);
606  decoded = value;
607  return true;
608 }
609 
610 bool Reader::decodeString(Token& token) {
611  String decoded_string;
612  if (!decodeString(token, decoded_string))
613  return false;
614  Value decoded(decoded_string);
615  currentValue().swapPayload(decoded);
616  currentValue().setOffsetStart(token.start_ - begin_);
617  currentValue().setOffsetLimit(token.end_ - begin_);
618  return true;
619 }
620 
621 bool Reader::decodeString(Token& token, String& decoded) {
622  decoded.reserve(static_cast<size_t>(token.end_ - token.start_ - 2));
623  Location current = token.start_ + 1; // skip '"'
624  Location end = token.end_ - 1; // do not include '"'
625  while (current != end) {
626  Char c = *current++;
627  if (c == '"')
628  break;
629  if (c == '\\') {
630  if (current == end)
631  return addError("Empty escape sequence in string", token, current);
632  Char escape = *current++;
633  switch (escape) {
634  case '"':
635  decoded += '"';
636  break;
637  case '/':
638  decoded += '/';
639  break;
640  case '\\':
641  decoded += '\\';
642  break;
643  case 'b':
644  decoded += '\b';
645  break;
646  case 'f':
647  decoded += '\f';
648  break;
649  case 'n':
650  decoded += '\n';
651  break;
652  case 'r':
653  decoded += '\r';
654  break;
655  case 't':
656  decoded += '\t';
657  break;
658  case 'u': {
659  unsigned int unicode;
660  if (!decodeUnicodeCodePoint(token, current, end, unicode))
661  return false;
662  decoded += codePointToUTF8(unicode);
663  } break;
664  default:
665  return addError("Bad escape sequence in string", token, current);
666  }
667  } else {
668  decoded += c;
669  }
670  }
671  return true;
672 }
673 
674 bool Reader::decodeUnicodeCodePoint(Token& token, Location& current,
675  Location end, unsigned int& unicode) {
676 
677  if (!decodeUnicodeEscapeSequence(token, current, end, unicode))
678  return false;
679  if (unicode >= 0xD800 && unicode <= 0xDBFF) {
680  // surrogate pairs
681  if (end - current < 6)
682  return addError(
683  "additional six characters expected to parse unicode surrogate pair.",
684  token, current);
685  if (*(current++) == '\\' && *(current++) == 'u') {
686  unsigned int surrogatePair;
687  if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair)) {
688  unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
689  } else
690  return false;
691  } else
692  return addError("expecting another \\u token to begin the second half of "
693  "a unicode surrogate pair",
694  token, current);
695  }
696  return true;
697 }
698 
699 bool Reader::decodeUnicodeEscapeSequence(Token& token, Location& current,
700  Location end,
701  unsigned int& ret_unicode) {
702  if (end - current < 4)
703  return addError(
704  "Bad unicode escape sequence in string: four digits expected.", token,
705  current);
706  int unicode = 0;
707  for (int index = 0; index < 4; ++index) {
708  Char c = *current++;
709  unicode *= 16;
710  if (c >= '0' && c <= '9')
711  unicode += c - '0';
712  else if (c >= 'a' && c <= 'f')
713  unicode += c - 'a' + 10;
714  else if (c >= 'A' && c <= 'F')
715  unicode += c - 'A' + 10;
716  else
717  return addError(
718  "Bad unicode escape sequence in string: hexadecimal digit expected.",
719  token, current);
720  }
721  ret_unicode = static_cast<unsigned int>(unicode);
722  return true;
723 }
724 
725 bool Reader::addError(const String& message, Token& token, Location extra) {
726  ErrorInfo info;
727  info.token_ = token;
728  info.message_ = message;
729  info.extra_ = extra;
730  errors_.push_back(info);
731  return false;
732 }
733 
734 bool Reader::recoverFromError(TokenType skipUntilToken) {
735  size_t const errorCount = errors_.size();
736  Token skip;
737  for (;;) {
738  if (!readToken(skip))
739  errors_.resize(errorCount); // discard errors caused by recovery
740  if (skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream)
741  break;
742  }
743  errors_.resize(errorCount);
744  return false;
745 }
746 
747 bool Reader::addErrorAndRecover(const String& message, Token& token,
748  TokenType skipUntilToken) {
749  addError(message, token);
750  return recoverFromError(skipUntilToken);
751 }
752 
753 Value& Reader::currentValue() { return *(nodes_.top()); }
754 
755 Reader::Char Reader::getNextChar() {
756  if (current_ == end_)
757  return 0;
758  return *current_++;
759 }
760 
761 void Reader::getLocationLineAndColumn(Location location, int& line,
762  int& column) const {
763  Location current = begin_;
764  Location lastLineStart = current;
765  line = 0;
766  while (current < location && current != end_) {
767  Char c = *current++;
768  if (c == '\r') {
769  if (*current == '\n')
770  ++current;
771  lastLineStart = current;
772  ++line;
773  } else if (c == '\n') {
774  lastLineStart = current;
775  ++line;
776  }
777  }
778  // column & line start at 1
779  column = int(location - lastLineStart) + 1;
780  ++line;
781 }
782 
783 String Reader::getLocationLineAndColumn(Location location) const {
784  int line, column;
785  getLocationLineAndColumn(location, line, column);
786  char buffer[18 + 16 + 16 + 1];
787  jsoncpp_snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
788  return buffer;
789 }
790 
791 // Deprecated. Preserved for backward compatibility
792 String Reader::getFormatedErrorMessages() const {
793  return getFormattedErrorMessages();
794 }
795 
797  String formattedMessage;
798  for (const auto& error : errors_) {
799  formattedMessage +=
800  "* " + getLocationLineAndColumn(error.token_.start_) + "\n";
801  formattedMessage += " " + error.message_ + "\n";
802  if (error.extra_)
803  formattedMessage +=
804  "See " + getLocationLineAndColumn(error.extra_) + " for detail.\n";
805  }
806  return formattedMessage;
807 }
808 
809 std::vector<Reader::StructuredError> Reader::getStructuredErrors() const {
810  std::vector<Reader::StructuredError> allErrors;
811  for (const auto& error : errors_) {
812  Reader::StructuredError structured;
813  structured.offset_start = error.token_.start_ - begin_;
814  structured.offset_limit = error.token_.end_ - begin_;
815  structured.message = error.message_;
816  allErrors.push_back(structured);
817  }
818  return allErrors;
819 }
820 
821 bool Reader::pushError(const Value& value, const String& message) {
822  ptrdiff_t const length = end_ - begin_;
823  if (value.getOffsetStart() > length || value.getOffsetLimit() > length)
824  return false;
825  Token token;
826  token.type_ = tokenError;
827  token.start_ = begin_ + value.getOffsetStart();
828  token.end_ = begin_ + value.getOffsetLimit();
829  ErrorInfo info;
830  info.token_ = token;
831  info.message_ = message;
832  info.extra_ = nullptr;
833  errors_.push_back(info);
834  return true;
835 }
836 
837 bool Reader::pushError(const Value& value, const String& message,
838  const Value& extra) {
839  ptrdiff_t const length = end_ - begin_;
840  if (value.getOffsetStart() > length || value.getOffsetLimit() > length ||
841  extra.getOffsetLimit() > length)
842  return false;
843  Token token;
844  token.type_ = tokenError;
845  token.start_ = begin_ + value.getOffsetStart();
846  token.end_ = begin_ + value.getOffsetLimit();
847  ErrorInfo info;
848  info.token_ = token;
849  info.message_ = message;
850  info.extra_ = begin_ + extra.getOffsetStart();
851  errors_.push_back(info);
852  return true;
853 }
854 
855 bool Reader::good() const { return errors_.empty(); }
856 
857 // Originally copied from the Features class (now deprecated), used internally
858 // for features implementation.
859 class OurFeatures {
860 public:
861  static OurFeatures all();
862  bool allowComments_;
863  bool allowTrailingCommas_;
864  bool strictRoot_;
865  bool allowDroppedNullPlaceholders_;
866  bool allowNumericKeys_;
867  bool allowSingleQuotes_;
868  bool failIfExtra_;
869  bool rejectDupKeys_;
870  bool allowSpecialFloats_;
871  bool skipBom_;
872  size_t stackLimit_;
873 }; // OurFeatures
874 
875 OurFeatures OurFeatures::all() { return {}; }
876 
877 // Implementation of class Reader
878 // ////////////////////////////////
879 
880 // Originally copied from the Reader class (now deprecated), used internally
881 // for implementing JSON reading.
882 class OurReader {
883 public:
884  using Char = char;
885  using Location = const Char*;
886  struct StructuredError {
887  ptrdiff_t offset_start;
888  ptrdiff_t offset_limit;
889  String message;
890  };
891 
892  explicit OurReader(OurFeatures const& features);
893  bool parse(const char* beginDoc, const char* endDoc, Value& root,
894  bool collectComments = true);
895  String getFormattedErrorMessages() const;
896  std::vector<StructuredError> getStructuredErrors() const;
897 
898 private:
899  OurReader(OurReader const&); // no impl
900  void operator=(OurReader const&); // no impl
901 
902  enum TokenType {
903  tokenEndOfStream = 0,
904  tokenObjectBegin,
905  tokenObjectEnd,
906  tokenArrayBegin,
907  tokenArrayEnd,
908  tokenString,
909  tokenNumber,
910  tokenTrue,
911  tokenFalse,
912  tokenNull,
913  tokenNaN,
914  tokenPosInf,
915  tokenNegInf,
916  tokenArraySeparator,
917  tokenMemberSeparator,
918  tokenComment,
919  tokenError
920  };
921 
922  class Token {
923  public:
924  TokenType type_;
925  Location start_;
926  Location end_;
927  };
928 
929  class ErrorInfo {
930  public:
931  Token token_;
932  String message_;
933  Location extra_;
934  };
935 
936  using Errors = std::deque<ErrorInfo>;
937 
938  bool readToken(Token& token);
939  void skipSpaces();
940  void skipBom(bool skipBom);
941  bool match(const Char* pattern, int patternLength);
942  bool readComment();
943  bool readCStyleComment(bool* containsNewLineResult);
944  bool readCppStyleComment();
945  bool readString();
946  bool readStringSingleQuote();
947  bool readNumber(bool checkInf);
948  bool readValue();
949  bool readObject(Token& token);
950  bool readArray(Token& token);
951  bool decodeNumber(Token& token);
952  bool decodeNumber(Token& token, Value& decoded);
953  bool decodeString(Token& token);
954  bool decodeString(Token& token, String& decoded);
955  bool decodeDouble(Token& token);
956  bool decodeDouble(Token& token, Value& decoded);
957  bool decodeUnicodeCodePoint(Token& token, Location& current, Location end,
958  unsigned int& unicode);
959  bool decodeUnicodeEscapeSequence(Token& token, Location& current,
960  Location end, unsigned int& unicode);
961  bool addError(const String& message, Token& token, Location extra = nullptr);
962  bool recoverFromError(TokenType skipUntilToken);
963  bool addErrorAndRecover(const String& message, Token& token,
964  TokenType skipUntilToken);
965  void skipUntilSpace();
966  Value& currentValue();
967  Char getNextChar();
968  void getLocationLineAndColumn(Location location, int& line,
969  int& column) const;
970  String getLocationLineAndColumn(Location location) const;
971  void addComment(Location begin, Location end, CommentPlacement placement);
972  void skipCommentTokens(Token& token);
973 
974  static String normalizeEOL(Location begin, Location end);
975  static bool containsNewLine(Location begin, Location end);
976 
977  using Nodes = std::stack<Value*>;
978 
979  Nodes nodes_{};
980  Errors errors_{};
981  String document_{};
982  Location begin_ = nullptr;
983  Location end_ = nullptr;
984  Location current_ = nullptr;
985  Location lastValueEnd_ = nullptr;
986  Value* lastValue_ = nullptr;
987  bool lastValueHasAComment_ = false;
988  String commentsBefore_{};
989 
990  OurFeatures const features_;
991  bool collectComments_ = false;
992 }; // OurReader
993 
994 // complete copy of Read impl, for OurReader
995 
996 bool OurReader::containsNewLine(OurReader::Location begin,
997  OurReader::Location end) {
998  return std::any_of(begin, end, [](char b) { return b == '\n' || b == '\r'; });
999 }
1000 
1001 OurReader::OurReader(OurFeatures const& features) : features_(features) {}
1002 
1003 bool OurReader::parse(const char* beginDoc, const char* endDoc, Value& root,
1004  bool collectComments) {
1005  if (!features_.allowComments_) {
1006  collectComments = false;
1007  }
1008 
1009  begin_ = beginDoc;
1010  end_ = endDoc;
1011  collectComments_ = collectComments;
1012  current_ = begin_;
1013  lastValueEnd_ = nullptr;
1014  lastValue_ = nullptr;
1015  commentsBefore_.clear();
1016  errors_.clear();
1017  while (!nodes_.empty())
1018  nodes_.pop();
1019  nodes_.push(&root);
1020 
1021  // skip byte order mark if it exists at the beginning of the UTF-8 text.
1022  skipBom(features_.skipBom_);
1023  bool successful = readValue();
1024  nodes_.pop();
1025  Token token;
1026  skipCommentTokens(token);
1027  if (features_.failIfExtra_ && (token.type_ != tokenEndOfStream)) {
1028  addError("Extra non-whitespace after JSON value.", token);
1029  return false;
1030  }
1031  if (collectComments_ && !commentsBefore_.empty())
1032  root.setComment(commentsBefore_, commentAfter);
1033  if (features_.strictRoot_) {
1034  if (!root.isArray() && !root.isObject()) {
1035  // Set error location to start of doc, ideally should be first token found
1036  // in doc
1037  token.type_ = tokenError;
1038  token.start_ = beginDoc;
1039  token.end_ = endDoc;
1040  addError(
1041  "A valid JSON document must be either an array or an object value.",
1042  token);
1043  return false;
1044  }
1045  }
1046  return successful;
1047 }
1048 
1049 bool OurReader::readValue() {
1050  // To preserve the old behaviour we cast size_t to int.
1051  if (nodes_.size() > features_.stackLimit_)
1052  throwRuntimeError("Exceeded stackLimit in readValue().");
1053  Token token;
1054  skipCommentTokens(token);
1055  bool successful = true;
1056 
1057  if (collectComments_ && !commentsBefore_.empty()) {
1058  currentValue().setComment(commentsBefore_, commentBefore);
1059  commentsBefore_.clear();
1060  }
1061 
1062  switch (token.type_) {
1063  case tokenObjectBegin:
1064  successful = readObject(token);
1065  currentValue().setOffsetLimit(current_ - begin_);
1066  break;
1067  case tokenArrayBegin:
1068  successful = readArray(token);
1069  currentValue().setOffsetLimit(current_ - begin_);
1070  break;
1071  case tokenNumber:
1072  successful = decodeNumber(token);
1073  break;
1074  case tokenString:
1075  successful = decodeString(token);
1076  break;
1077  case tokenTrue: {
1078  Value v(true);
1079  currentValue().swapPayload(v);
1080  currentValue().setOffsetStart(token.start_ - begin_);
1081  currentValue().setOffsetLimit(token.end_ - begin_);
1082  } break;
1083  case tokenFalse: {
1084  Value v(false);
1085  currentValue().swapPayload(v);
1086  currentValue().setOffsetStart(token.start_ - begin_);
1087  currentValue().setOffsetLimit(token.end_ - begin_);
1088  } break;
1089  case tokenNull: {
1090  Value v;
1091  currentValue().swapPayload(v);
1092  currentValue().setOffsetStart(token.start_ - begin_);
1093  currentValue().setOffsetLimit(token.end_ - begin_);
1094  } break;
1095  case tokenNaN: {
1096  Value v(std::numeric_limits<double>::quiet_NaN());
1097  currentValue().swapPayload(v);
1098  currentValue().setOffsetStart(token.start_ - begin_);
1099  currentValue().setOffsetLimit(token.end_ - begin_);
1100  } break;
1101  case tokenPosInf: {
1102  Value v(std::numeric_limits<double>::infinity());
1103  currentValue().swapPayload(v);
1104  currentValue().setOffsetStart(token.start_ - begin_);
1105  currentValue().setOffsetLimit(token.end_ - begin_);
1106  } break;
1107  case tokenNegInf: {
1108  Value v(-std::numeric_limits<double>::infinity());
1109  currentValue().swapPayload(v);
1110  currentValue().setOffsetStart(token.start_ - begin_);
1111  currentValue().setOffsetLimit(token.end_ - begin_);
1112  } break;
1113  case tokenArraySeparator:
1114  case tokenObjectEnd:
1115  case tokenArrayEnd:
1116  if (features_.allowDroppedNullPlaceholders_) {
1117  // "Un-read" the current token and mark the current value as a null
1118  // token.
1119  current_--;
1120  Value v;
1121  currentValue().swapPayload(v);
1122  currentValue().setOffsetStart(current_ - begin_ - 1);
1123  currentValue().setOffsetLimit(current_ - begin_);
1124  break;
1125  } // else, fall through ...
1126  default:
1127  currentValue().setOffsetStart(token.start_ - begin_);
1128  currentValue().setOffsetLimit(token.end_ - begin_);
1129  return addError("Syntax error: value, object or array expected.", token);
1130  }
1131 
1132  if (collectComments_) {
1133  lastValueEnd_ = current_;
1134  lastValueHasAComment_ = false;
1135  lastValue_ = &currentValue();
1136  }
1137 
1138  return successful;
1139 }
1140 
1141 void OurReader::skipCommentTokens(Token& token) {
1142  if (features_.allowComments_) {
1143  do {
1144  readToken(token);
1145  } while (token.type_ == tokenComment);
1146  } else {
1147  readToken(token);
1148  }
1149 }
1150 
1151 bool OurReader::readToken(Token& token) {
1152  skipSpaces();
1153  token.start_ = current_;
1154  Char c = getNextChar();
1155  bool ok = true;
1156  switch (c) {
1157  case '{':
1158  token.type_ = tokenObjectBegin;
1159  break;
1160  case '}':
1161  token.type_ = tokenObjectEnd;
1162  break;
1163  case '[':
1164  token.type_ = tokenArrayBegin;
1165  break;
1166  case ']':
1167  token.type_ = tokenArrayEnd;
1168  break;
1169  case '"':
1170  token.type_ = tokenString;
1171  ok = readString();
1172  break;
1173  case '\'':
1174  if (features_.allowSingleQuotes_) {
1175  token.type_ = tokenString;
1176  ok = readStringSingleQuote();
1177  } else {
1178  // If we don't allow single quotes, this is a failure case.
1179  ok = false;
1180  }
1181  break;
1182  case '/':
1183  token.type_ = tokenComment;
1184  ok = readComment();
1185  break;
1186  case '0':
1187  case '1':
1188  case '2':
1189  case '3':
1190  case '4':
1191  case '5':
1192  case '6':
1193  case '7':
1194  case '8':
1195  case '9':
1196  token.type_ = tokenNumber;
1197  readNumber(false);
1198  break;
1199  case '-':
1200  if (readNumber(true)) {
1201  token.type_ = tokenNumber;
1202  } else {
1203  token.type_ = tokenNegInf;
1204  ok = features_.allowSpecialFloats_ && match("nfinity", 7);
1205  }
1206  break;
1207  case '+':
1208  if (readNumber(true)) {
1209  token.type_ = tokenNumber;
1210  } else {
1211  token.type_ = tokenPosInf;
1212  ok = features_.allowSpecialFloats_ && match("nfinity", 7);
1213  }
1214  break;
1215  case 't':
1216  token.type_ = tokenTrue;
1217  ok = match("rue", 3);
1218  break;
1219  case 'f':
1220  token.type_ = tokenFalse;
1221  ok = match("alse", 4);
1222  break;
1223  case 'n':
1224  token.type_ = tokenNull;
1225  ok = match("ull", 3);
1226  break;
1227  case 'N':
1228  if (features_.allowSpecialFloats_) {
1229  token.type_ = tokenNaN;
1230  ok = match("aN", 2);
1231  } else {
1232  ok = false;
1233  }
1234  break;
1235  case 'I':
1236  if (features_.allowSpecialFloats_) {
1237  token.type_ = tokenPosInf;
1238  ok = match("nfinity", 7);
1239  } else {
1240  ok = false;
1241  }
1242  break;
1243  case ',':
1244  token.type_ = tokenArraySeparator;
1245  break;
1246  case ':':
1247  token.type_ = tokenMemberSeparator;
1248  break;
1249  case 0:
1250  token.type_ = tokenEndOfStream;
1251  break;
1252  default:
1253  ok = false;
1254  break;
1255  }
1256  if (!ok)
1257  token.type_ = tokenError;
1258  token.end_ = current_;
1259  return ok;
1260 }
1261 
1262 void OurReader::skipSpaces() {
1263  while (current_ != end_) {
1264  Char c = *current_;
1265  if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
1266  ++current_;
1267  else
1268  break;
1269  }
1270 }
1271 
1272 void OurReader::skipBom(bool skipBom) {
1273  // The default behavior is to skip BOM.
1274  if (skipBom) {
1275  if ((end_ - begin_) >= 3 && strncmp(begin_, "\xEF\xBB\xBF", 3) == 0) {
1276  begin_ += 3;
1277  current_ = begin_;
1278  }
1279  }
1280 }
1281 
1282 bool OurReader::match(const Char* pattern, int patternLength) {
1283  if (end_ - current_ < patternLength)
1284  return false;
1285  int index = patternLength;
1286  while (index--)
1287  if (current_[index] != pattern[index])
1288  return false;
1289  current_ += patternLength;
1290  return true;
1291 }
1292 
1293 bool OurReader::readComment() {
1294  const Location commentBegin = current_ - 1;
1295  const Char c = getNextChar();
1296  bool successful = false;
1297  bool cStyleWithEmbeddedNewline = false;
1298 
1299  const bool isCStyleComment = (c == '*');
1300  const bool isCppStyleComment = (c == '/');
1301  if (isCStyleComment) {
1302  successful = readCStyleComment(&cStyleWithEmbeddedNewline);
1303  } else if (isCppStyleComment) {
1304  successful = readCppStyleComment();
1305  }
1306 
1307  if (!successful)
1308  return false;
1309 
1310  if (collectComments_) {
1311  CommentPlacement placement = commentBefore;
1312 
1313  if (!lastValueHasAComment_) {
1314  if (lastValueEnd_ && !containsNewLine(lastValueEnd_, commentBegin)) {
1315  if (isCppStyleComment || !cStyleWithEmbeddedNewline) {
1316  placement = commentAfterOnSameLine;
1317  lastValueHasAComment_ = true;
1318  }
1319  }
1320  }
1321 
1322  addComment(commentBegin, current_, placement);
1323  }
1324  return true;
1325 }
1326 
1327 String OurReader::normalizeEOL(OurReader::Location begin,
1328  OurReader::Location end) {
1329  String normalized;
1330  normalized.reserve(static_cast<size_t>(end - begin));
1331  OurReader::Location current = begin;
1332  while (current != end) {
1333  char c = *current++;
1334  if (c == '\r') {
1335  if (current != end && *current == '\n')
1336  // convert dos EOL
1337  ++current;
1338  // convert Mac EOL
1339  normalized += '\n';
1340  } else {
1341  normalized += c;
1342  }
1343  }
1344  return normalized;
1345 }
1346 
1347 void OurReader::addComment(Location begin, Location end,
1348  CommentPlacement placement) {
1349  assert(collectComments_);
1350  const String& normalized = normalizeEOL(begin, end);
1351  if (placement == commentAfterOnSameLine) {
1352  assert(lastValue_ != nullptr);
1353  lastValue_->setComment(normalized, placement);
1354  } else {
1355  commentsBefore_ += normalized;
1356  }
1357 }
1358 
1359 bool OurReader::readCStyleComment(bool* containsNewLineResult) {
1360  *containsNewLineResult = false;
1361 
1362  while ((current_ + 1) < end_) {
1363  Char c = getNextChar();
1364  if (c == '*' && *current_ == '/')
1365  break;
1366  if (c == '\n')
1367  *containsNewLineResult = true;
1368  }
1369 
1370  return getNextChar() == '/';
1371 }
1372 
1373 bool OurReader::readCppStyleComment() {
1374  while (current_ != end_) {
1375  Char c = getNextChar();
1376  if (c == '\n')
1377  break;
1378  if (c == '\r') {
1379  // Consume DOS EOL. It will be normalized in addComment.
1380  if (current_ != end_ && *current_ == '\n')
1381  getNextChar();
1382  // Break on Moc OS 9 EOL.
1383  break;
1384  }
1385  }
1386  return true;
1387 }
1388 
1389 bool OurReader::readNumber(bool checkInf) {
1390  Location p = current_;
1391  if (checkInf && p != end_ && *p == 'I') {
1392  current_ = ++p;
1393  return false;
1394  }
1395  char c = '0'; // stopgap for already consumed character
1396  // integral part
1397  while (c >= '0' && c <= '9')
1398  c = (current_ = p) < end_ ? *p++ : '\0';
1399  // fractional part
1400  if (c == '.') {
1401  c = (current_ = p) < end_ ? *p++ : '\0';
1402  while (c >= '0' && c <= '9')
1403  c = (current_ = p) < end_ ? *p++ : '\0';
1404  }
1405  // exponential part
1406  if (c == 'e' || c == 'E') {
1407  c = (current_ = p) < end_ ? *p++ : '\0';
1408  if (c == '+' || c == '-')
1409  c = (current_ = p) < end_ ? *p++ : '\0';
1410  while (c >= '0' && c <= '9')
1411  c = (current_ = p) < end_ ? *p++ : '\0';
1412  }
1413  return true;
1414 }
1415 bool OurReader::readString() {
1416  Char c = 0;
1417  while (current_ != end_) {
1418  c = getNextChar();
1419  if (c == '\\')
1420  getNextChar();
1421  else if (c == '"')
1422  break;
1423  }
1424  return c == '"';
1425 }
1426 
1427 bool OurReader::readStringSingleQuote() {
1428  Char c = 0;
1429  while (current_ != end_) {
1430  c = getNextChar();
1431  if (c == '\\')
1432  getNextChar();
1433  else if (c == '\'')
1434  break;
1435  }
1436  return c == '\'';
1437 }
1438 
1439 bool OurReader::readObject(Token& token) {
1440  Token tokenName;
1441  String name;
1442  Value init(objectValue);
1443  currentValue().swapPayload(init);
1444  currentValue().setOffsetStart(token.start_ - begin_);
1445  while (readToken(tokenName)) {
1446  bool initialTokenOk = true;
1447  while (tokenName.type_ == tokenComment && initialTokenOk)
1448  initialTokenOk = readToken(tokenName);
1449  if (!initialTokenOk)
1450  break;
1451  if (tokenName.type_ == tokenObjectEnd &&
1452  (name.empty() ||
1453  features_.allowTrailingCommas_)) // empty object or trailing comma
1454  return true;
1455  name.clear();
1456  if (tokenName.type_ == tokenString) {
1457  if (!decodeString(tokenName, name))
1458  return recoverFromError(tokenObjectEnd);
1459  } else if (tokenName.type_ == tokenNumber && features_.allowNumericKeys_) {
1460  Value numberName;
1461  if (!decodeNumber(tokenName, numberName))
1462  return recoverFromError(tokenObjectEnd);
1463  name = numberName.asString();
1464  } else {
1465  break;
1466  }
1467  if (name.length() >= (1U << 30))
1468  throwRuntimeError("keylength >= 2^30");
1469  if (features_.rejectDupKeys_ && currentValue().isMember(name)) {
1470  String msg = "Duplicate key: '" + name + "'";
1471  return addErrorAndRecover(msg, tokenName, tokenObjectEnd);
1472  }
1473 
1474  Token colon;
1475  if (!readToken(colon) || colon.type_ != tokenMemberSeparator) {
1476  return addErrorAndRecover("Missing ':' after object member name", colon,
1477  tokenObjectEnd);
1478  }
1479  Value& value = currentValue()[name];
1480  nodes_.push(&value);
1481  bool ok = readValue();
1482  nodes_.pop();
1483  if (!ok) // error already set
1484  return recoverFromError(tokenObjectEnd);
1485 
1486  Token comma;
1487  if (!readToken(comma) ||
1488  (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator &&
1489  comma.type_ != tokenComment)) {
1490  return addErrorAndRecover("Missing ',' or '}' in object declaration",
1491  comma, tokenObjectEnd);
1492  }
1493  bool finalizeTokenOk = true;
1494  while (comma.type_ == tokenComment && finalizeTokenOk)
1495  finalizeTokenOk = readToken(comma);
1496  if (comma.type_ == tokenObjectEnd)
1497  return true;
1498  }
1499  return addErrorAndRecover("Missing '}' or object member name", tokenName,
1500  tokenObjectEnd);
1501 }
1502 
1503 bool OurReader::readArray(Token& token) {
1504  Value init(arrayValue);
1505  currentValue().swapPayload(init);
1506  currentValue().setOffsetStart(token.start_ - begin_);
1507  int index = 0;
1508  for (;;) {
1509  skipSpaces();
1510  if (current_ != end_ && *current_ == ']' &&
1511  (index == 0 ||
1512  (features_.allowTrailingCommas_ &&
1513  !features_.allowDroppedNullPlaceholders_))) // empty array or trailing
1514  // comma
1515  {
1516  Token endArray;
1517  readToken(endArray);
1518  return true;
1519  }
1520  Value& value = currentValue()[index++];
1521  nodes_.push(&value);
1522  bool ok = readValue();
1523  nodes_.pop();
1524  if (!ok) // error already set
1525  return recoverFromError(tokenArrayEnd);
1526 
1527  Token currentToken;
1528  // Accept Comment after last item in the array.
1529  ok = readToken(currentToken);
1530  while (currentToken.type_ == tokenComment && ok) {
1531  ok = readToken(currentToken);
1532  }
1533  bool badTokenType = (currentToken.type_ != tokenArraySeparator &&
1534  currentToken.type_ != tokenArrayEnd);
1535  if (!ok || badTokenType) {
1536  return addErrorAndRecover("Missing ',' or ']' in array declaration",
1537  currentToken, tokenArrayEnd);
1538  }
1539  if (currentToken.type_ == tokenArrayEnd)
1540  break;
1541  }
1542  return true;
1543 }
1544 
1545 bool OurReader::decodeNumber(Token& token) {
1546  Value decoded;
1547  if (!decodeNumber(token, decoded))
1548  return false;
1549  currentValue().swapPayload(decoded);
1550  currentValue().setOffsetStart(token.start_ - begin_);
1551  currentValue().setOffsetLimit(token.end_ - begin_);
1552  return true;
1553 }
1554 
1555 bool OurReader::decodeNumber(Token& token, Value& decoded) {
1556  // Attempts to parse the number as an integer. If the number is
1557  // larger than the maximum supported value of an integer then
1558  // we decode the number as a double.
1559  Location current = token.start_;
1560  const bool isNegative = *current == '-';
1561  if (isNegative) {
1562  ++current;
1563  }
1564 
1565  // We assume we can represent the largest and smallest integer types as
1566  // unsigned integers with separate sign. This is only true if they can fit
1567  // into an unsigned integer.
1569  "Int must be smaller than UInt");
1570 
1571  // We need to convert minLargestInt into a positive number. The easiest way
1572  // to do this conversion is to assume our "threshold" value of minLargestInt
1573  // divided by 10 can fit in maxLargestInt when absolute valued. This should
1574  // be a safe assumption.
1575  static_assert(Value::minLargestInt <= -Value::maxLargestInt,
1576  "The absolute value of minLargestInt must be greater than or "
1577  "equal to maxLargestInt");
1578  static_assert(Value::minLargestInt / 10 >= -Value::maxLargestInt,
1579  "The absolute value of minLargestInt must be only 1 magnitude "
1580  "larger than maxLargest Int");
1581 
1582  static constexpr Value::LargestUInt positive_threshold =
1583  Value::maxLargestUInt / 10;
1584  static constexpr Value::UInt positive_last_digit = Value::maxLargestUInt % 10;
1585 
1586  // For the negative values, we have to be more careful. Since typically
1587  // -Value::minLargestInt will cause an overflow, we first divide by 10 and
1588  // then take the inverse. This assumes that minLargestInt is only a single
1589  // power of 10 different in magnitude, which we check above. For the last
1590  // digit, we take the modulus before negating for the same reason.
1591  static constexpr auto negative_threshold =
1593  static constexpr auto negative_last_digit =
1595 
1596  const Value::LargestUInt threshold =
1597  isNegative ? negative_threshold : positive_threshold;
1598  const Value::UInt max_last_digit =
1599  isNegative ? negative_last_digit : positive_last_digit;
1600 
1601  Value::LargestUInt value = 0;
1602  while (current < token.end_) {
1603  Char c = *current++;
1604  if (c < '0' || c > '9')
1605  return decodeDouble(token, decoded);
1606 
1607  const auto digit(static_cast<Value::UInt>(c - '0'));
1608  if (value >= threshold) {
1609  // We've hit or exceeded the max value divided by 10 (rounded down). If
1610  // a) we've only just touched the limit, meaing value == threshold,
1611  // b) this is the last digit, or
1612  // c) it's small enough to fit in that rounding delta, we're okay.
1613  // Otherwise treat this number as a double to avoid overflow.
1614  if (value > threshold || current != token.end_ ||
1615  digit > max_last_digit) {
1616  return decodeDouble(token, decoded);
1617  }
1618  }
1619  value = value * 10 + digit;
1620  }
1621 
1622  if (isNegative) {
1623  // We use the same magnitude assumption here, just in case.
1624  const auto last_digit = static_cast<Value::UInt>(value % 10);
1625  decoded = -Value::LargestInt(value / 10) * 10 - last_digit;
1626  } else if (value <= Value::LargestUInt(Value::maxLargestInt)) {
1627  decoded = Value::LargestInt(value);
1628  } else {
1629  decoded = value;
1630  }
1631 
1632  return true;
1633 }
1634 
1635 bool OurReader::decodeDouble(Token& token) {
1636  Value decoded;
1637  if (!decodeDouble(token, decoded))
1638  return false;
1639  currentValue().swapPayload(decoded);
1640  currentValue().setOffsetStart(token.start_ - begin_);
1641  currentValue().setOffsetLimit(token.end_ - begin_);
1642  return true;
1643 }
1644 
1645 bool OurReader::decodeDouble(Token& token, Value& decoded) {
1646  double value = 0;
1647  const String buffer(token.start_, token.end_);
1648  IStringStream is(buffer);
1649  if (!(is >> value)) {
1650  return addError(
1651  "'" + String(token.start_, token.end_) + "' is not a number.", token);
1652  }
1653  decoded = value;
1654  return true;
1655 }
1656 
1657 bool OurReader::decodeString(Token& token) {
1658  String decoded_string;
1659  if (!decodeString(token, decoded_string))
1660  return false;
1661  Value decoded(decoded_string);
1662  currentValue().swapPayload(decoded);
1663  currentValue().setOffsetStart(token.start_ - begin_);
1664  currentValue().setOffsetLimit(token.end_ - begin_);
1665  return true;
1666 }
1667 
1668 bool OurReader::decodeString(Token& token, String& decoded) {
1669  decoded.reserve(static_cast<size_t>(token.end_ - token.start_ - 2));
1670  Location current = token.start_ + 1; // skip '"'
1671  Location end = token.end_ - 1; // do not include '"'
1672  while (current != end) {
1673  Char c = *current++;
1674  if (c == '"')
1675  break;
1676  if (c == '\\') {
1677  if (current == end)
1678  return addError("Empty escape sequence in string", token, current);
1679  Char escape = *current++;
1680  switch (escape) {
1681  case '"':
1682  decoded += '"';
1683  break;
1684  case '/':
1685  decoded += '/';
1686  break;
1687  case '\\':
1688  decoded += '\\';
1689  break;
1690  case 'b':
1691  decoded += '\b';
1692  break;
1693  case 'f':
1694  decoded += '\f';
1695  break;
1696  case 'n':
1697  decoded += '\n';
1698  break;
1699  case 'r':
1700  decoded += '\r';
1701  break;
1702  case 't':
1703  decoded += '\t';
1704  break;
1705  case 'u': {
1706  unsigned int unicode;
1707  if (!decodeUnicodeCodePoint(token, current, end, unicode))
1708  return false;
1709  decoded += codePointToUTF8(unicode);
1710  } break;
1711  default:
1712  return addError("Bad escape sequence in string", token, current);
1713  }
1714  } else {
1715  decoded += c;
1716  }
1717  }
1718  return true;
1719 }
1720 
1721 bool OurReader::decodeUnicodeCodePoint(Token& token, Location& current,
1722  Location end, unsigned int& unicode) {
1723 
1724  if (!decodeUnicodeEscapeSequence(token, current, end, unicode))
1725  return false;
1726  if (unicode >= 0xD800 && unicode <= 0xDBFF) {
1727  // surrogate pairs
1728  if (end - current < 6)
1729  return addError(
1730  "additional six characters expected to parse unicode surrogate pair.",
1731  token, current);
1732  if (*(current++) == '\\' && *(current++) == 'u') {
1733  unsigned int surrogatePair;
1734  if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair)) {
1735  unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
1736  } else
1737  return false;
1738  } else
1739  return addError("expecting another \\u token to begin the second half of "
1740  "a unicode surrogate pair",
1741  token, current);
1742  }
1743  return true;
1744 }
1745 
1746 bool OurReader::decodeUnicodeEscapeSequence(Token& token, Location& current,
1747  Location end,
1748  unsigned int& ret_unicode) {
1749  if (end - current < 4)
1750  return addError(
1751  "Bad unicode escape sequence in string: four digits expected.", token,
1752  current);
1753  int unicode = 0;
1754  for (int index = 0; index < 4; ++index) {
1755  Char c = *current++;
1756  unicode *= 16;
1757  if (c >= '0' && c <= '9')
1758  unicode += c - '0';
1759  else if (c >= 'a' && c <= 'f')
1760  unicode += c - 'a' + 10;
1761  else if (c >= 'A' && c <= 'F')
1762  unicode += c - 'A' + 10;
1763  else
1764  return addError(
1765  "Bad unicode escape sequence in string: hexadecimal digit expected.",
1766  token, current);
1767  }
1768  ret_unicode = static_cast<unsigned int>(unicode);
1769  return true;
1770 }
1771 
1772 bool OurReader::addError(const String& message, Token& token, Location extra) {
1773  ErrorInfo info;
1774  info.token_ = token;
1775  info.message_ = message;
1776  info.extra_ = extra;
1777  errors_.push_back(info);
1778  return false;
1779 }
1780 
1781 bool OurReader::recoverFromError(TokenType skipUntilToken) {
1782  size_t errorCount = errors_.size();
1783  Token skip;
1784  for (;;) {
1785  if (!readToken(skip))
1786  errors_.resize(errorCount); // discard errors caused by recovery
1787  if (skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream)
1788  break;
1789  }
1790  errors_.resize(errorCount);
1791  return false;
1792 }
1793 
1794 bool OurReader::addErrorAndRecover(const String& message, Token& token,
1795  TokenType skipUntilToken) {
1796  addError(message, token);
1797  return recoverFromError(skipUntilToken);
1798 }
1799 
1800 Value& OurReader::currentValue() { return *(nodes_.top()); }
1801 
1802 OurReader::Char OurReader::getNextChar() {
1803  if (current_ == end_)
1804  return 0;
1805  return *current_++;
1806 }
1807 
1808 void OurReader::getLocationLineAndColumn(Location location, int& line,
1809  int& column) const {
1810  Location current = begin_;
1811  Location lastLineStart = current;
1812  line = 0;
1813  while (current < location && current != end_) {
1814  Char c = *current++;
1815  if (c == '\r') {
1816  if (*current == '\n')
1817  ++current;
1818  lastLineStart = current;
1819  ++line;
1820  } else if (c == '\n') {
1821  lastLineStart = current;
1822  ++line;
1823  }
1824  }
1825  // column & line start at 1
1826  column = int(location - lastLineStart) + 1;
1827  ++line;
1828 }
1829 
1830 String OurReader::getLocationLineAndColumn(Location location) const {
1831  int line, column;
1832  getLocationLineAndColumn(location, line, column);
1833  char buffer[18 + 16 + 16 + 1];
1834  jsoncpp_snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
1835  return buffer;
1836 }
1837 
1838 String OurReader::getFormattedErrorMessages() const {
1839  String formattedMessage;
1840  for (const auto& error : errors_) {
1841  formattedMessage +=
1842  "* " + getLocationLineAndColumn(error.token_.start_) + "\n";
1843  formattedMessage += " " + error.message_ + "\n";
1844  if (error.extra_)
1845  formattedMessage +=
1846  "See " + getLocationLineAndColumn(error.extra_) + " for detail.\n";
1847  }
1848  return formattedMessage;
1849 }
1850 
1851 std::vector<OurReader::StructuredError> OurReader::getStructuredErrors() const {
1852  std::vector<OurReader::StructuredError> allErrors;
1853  for (const auto& error : errors_) {
1854  OurReader::StructuredError structured;
1855  structured.offset_start = error.token_.start_ - begin_;
1856  structured.offset_limit = error.token_.end_ - begin_;
1857  structured.message = error.message_;
1858  allErrors.push_back(structured);
1859  }
1860  return allErrors;
1861 }
1862 
1863 class OurCharReader : public CharReader {
1864  bool const collectComments_;
1865  OurReader reader_;
1866 
1867 public:
1868  OurCharReader(bool collectComments, OurFeatures const& features)
1869  : collectComments_(collectComments), reader_(features) {}
1870  bool parse(char const* beginDoc, char const* endDoc, Value* root,
1871  String* errs) override {
1872  bool ok = reader_.parse(beginDoc, endDoc, *root, collectComments_);
1873  if (errs) {
1874  *errs = reader_.getFormattedErrorMessages();
1875  }
1876  return ok;
1877  }
1878 };
1879 
1883  bool collectComments = settings_["collectComments"].asBool();
1884  OurFeatures features = OurFeatures::all();
1885  features.allowComments_ = settings_["allowComments"].asBool();
1886  features.allowTrailingCommas_ = settings_["allowTrailingCommas"].asBool();
1887  features.strictRoot_ = settings_["strictRoot"].asBool();
1888  features.allowDroppedNullPlaceholders_ =
1889  settings_["allowDroppedNullPlaceholders"].asBool();
1890  features.allowNumericKeys_ = settings_["allowNumericKeys"].asBool();
1891  features.allowSingleQuotes_ = settings_["allowSingleQuotes"].asBool();
1892 
1893  // Stack limit is always a size_t, so we get this as an unsigned int
1894  // regardless of it we have 64-bit integer support enabled.
1895  features.stackLimit_ = static_cast<size_t>(settings_["stackLimit"].asUInt());
1896  features.failIfExtra_ = settings_["failIfExtra"].asBool();
1897  features.rejectDupKeys_ = settings_["rejectDupKeys"].asBool();
1898  features.allowSpecialFloats_ = settings_["allowSpecialFloats"].asBool();
1899  features.skipBom_ = settings_["skipBom"].asBool();
1900  return new OurCharReader(collectComments, features);
1901 }
1902 
1904  static const auto& valid_keys = *new std::set<String>{
1905  "collectComments",
1906  "allowComments",
1907  "allowTrailingCommas",
1908  "strictRoot",
1909  "allowDroppedNullPlaceholders",
1910  "allowNumericKeys",
1911  "allowSingleQuotes",
1912  "stackLimit",
1913  "failIfExtra",
1914  "rejectDupKeys",
1915  "allowSpecialFloats",
1916  "skipBom",
1917  };
1918  for (auto si = settings_.begin(); si != settings_.end(); ++si) {
1919  auto key = si.name();
1920  if (valid_keys.count(key))
1921  continue;
1922  if (invalid)
1923  (*invalid)[key] = *si;
1924  else
1925  return false;
1926  }
1927  return invalid ? invalid->empty() : true;
1928 }
1929 
1931  return settings_[key];
1932 }
1933 // static
1936  (*settings)["allowComments"] = false;
1937  (*settings)["allowTrailingCommas"] = false;
1938  (*settings)["strictRoot"] = true;
1939  (*settings)["allowDroppedNullPlaceholders"] = false;
1940  (*settings)["allowNumericKeys"] = false;
1941  (*settings)["allowSingleQuotes"] = false;
1942  (*settings)["stackLimit"] = 1000;
1943  (*settings)["failIfExtra"] = true;
1944  (*settings)["rejectDupKeys"] = true;
1945  (*settings)["allowSpecialFloats"] = false;
1946  (*settings)["skipBom"] = true;
1948 }
1949 // static
1952  (*settings)["collectComments"] = true;
1953  (*settings)["allowComments"] = true;
1954  (*settings)["allowTrailingCommas"] = true;
1955  (*settings)["strictRoot"] = false;
1956  (*settings)["allowDroppedNullPlaceholders"] = false;
1957  (*settings)["allowNumericKeys"] = false;
1958  (*settings)["allowSingleQuotes"] = false;
1959  (*settings)["stackLimit"] = 1000;
1960  (*settings)["failIfExtra"] = false;
1961  (*settings)["rejectDupKeys"] = false;
1962  (*settings)["allowSpecialFloats"] = false;
1963  (*settings)["skipBom"] = true;
1965 }
1966 
1968 // global functions
1969 
1970 bool parseFromStream(CharReader::Factory const& fact, IStream& sin, Value* root,
1971  String* errs) {
1972  OStringStream ssin;
1973  ssin << sin.rdbuf();
1974  String doc = ssin.str();
1975  char const* begin = doc.data();
1976  char const* end = begin + doc.size();
1977  // Note that we do not actually need a null-terminator.
1978  CharReaderPtr const reader(fact.newCharReader());
1979  return reader->parse(begin, end, root, errs);
1980 }
1981 
1984  String errs;
1985  bool ok = parseFromStream(b, sin, &root, &errs);
1986  if (!ok) {
1987  throwRuntimeError(errs);
1988  }
1989  return sin;
1990 }
1991 
1992 } // namespace Json
virtual CharReader * newCharReader() const =0
Allocate a CharReader via operator new().
Build a CharReader implementation.
Definition: reader.h:288
static void setDefaults(Json::Value *settings)
Called by ctor, but you can use this to reset settings_.
Value & operator[](const String &key)
A simple way to update a specific setting.
CharReader * newCharReader() const override
Allocate a CharReader via operator new().
static void strictMode(Json::Value *settings)
Same as old Features::strictMode().
Json::Value settings_
Configuration of this builder.
Definition: reader.h:334
~CharReaderBuilder() override
bool validate(Json::Value *invalid) const
Interface for reading JSON from a char array.
Definition: reader.h:244
Configuration passed to reader and writer.
Definition: json_features.h:21
bool strictRoot_
true if root must be either an array or an object value.
Definition: json_features.h:48
bool allowComments_
true if comments are allowed. Default: true.
Definition: json_features.h:44
bool allowDroppedNullPlaceholders_
true if dropped null placeholders are allowed. Default: false.
Definition: json_features.h:51
static Features all()
A configuration that allows all features and assumes all strings are UTF-8.
Definition: json_reader.cpp:66
Features()
Initialize the configuration like JsonConfig::allFeatures;.
static Features strictMode()
A configuration that is strictly compatible with the JSON specification.
Definition: json_reader.cpp:68
bool allowNumericKeys_
true if numeric object key are allowed. Default: false.
Definition: json_features.h:54
char Char
Definition: reader.h:38
Reader()
Constructs a Reader allowing all features for parsing.
Definition: json_reader.cpp:87
bool pushError(const Value &value, const String &message)
Add a semantic error message.
bool good() const
Return whether there are any errors.
const Char * Location
Definition: reader.h:39
std::vector< StructuredError > getStructuredErrors() const
Returns a vector of structured errors encountered while parsing.
bool parse(const std::string &document, Value &root, bool collectComments=true)
Read a Value from a JSON document.
Definition: json_reader.cpp:91
String getFormattedErrorMessages() const
Returns a user friendly string that list errors in the parsed document.
Represents a JSON value.
Definition: value.h:193
const_iterator begin() const
bool empty() const
Return true if empty array, empty object, or null; otherwise, false.
Definition: json_value.cpp:882
static constexpr LargestInt maxLargestInt
Maximum signed integer value that can be stored in a Json::Value.
Definition: value.h:226
Json::UInt UInt
Definition: value.h:200
bool isArray() const
void setComment(const char *comment, size_t len, CommentPlacement placement)
Comments must be //... or /* ... *‍/.
Definition: value.h:570
ptrdiff_t getOffsetLimit() const
const_iterator end() const
void swapPayload(Value &other)
Swap values but leave comments and source offsets in place.
Definition: json_value.cpp:456
void setOffsetLimit(ptrdiff_t limit)
Json::LargestInt LargestInt
Definition: value.h:206
Json::LargestUInt LargestUInt
Definition: value.h:207
UInt asUInt() const
Definition: json_value.cpp:676
bool isObject() const
void setOffsetStart(ptrdiff_t start)
static constexpr Int maxInt
Maximum signed int value that can be stored in a Json::Value.
Definition: value.h:233
bool asBool() const
Definition: json_value.cpp:804
static constexpr LargestUInt maxLargestUInt
Maximum unsigned integer value that can be stored in a Json::Value.
Definition: value.h:228
static constexpr LargestInt minLargestInt
Minimum signed integer value that can be stored in a Json::Value.
Definition: value.h:223
ptrdiff_t getOffsetStart() const
#define jsoncpp_snprintf
Definition: config.h:63
#define JSONCPP_DEPRECATED_STACK_LIMIT
Definition: json_reader.cpp:47
static size_t const stackLimit_g
Definition: json_reader.cpp:50
JSON (JavaScript Object Notation).
Definition: allocator.h:14
std::basic_istringstream< String::value_type, String::traits_type, String::allocator_type > IStringStream
Definition: config.h:135
std::basic_ostringstream< String::value_type, String::traits_type, String::allocator_type > OStringStream
Definition: config.h:138
std::basic_string< char, std::char_traits< char >, Allocator< char > > String
Definition: config.h:132
CommentPlacement
Definition: value.h:118
@ commentAfterOnSameLine
a comment just after a value on the same line
Definition: value.h:120
@ commentBefore
a comment placed on the line before a value
Definition: value.h:119
@ commentAfter
a comment on the line after a value (only make sense for
Definition: value.h:121
@ arrayValue
array value (ordered list)
Definition: value.h:114
@ objectValue
object value (collection of name/value pairs).
Definition: value.h:115
static String codePointToUTF8(unsigned int cp)
Converts a unicode code-point to UTF-8.
Definition: json_tool.h:39
IStream & operator>>(IStream &, Value &)
Read from 'sin' into 'root'.
bool parseFromStream(CharReader::Factory const &, IStream &, Value *root, String *errs)
Consume entire stream and use its begin/end.
std::istream IStream
Definition: config.h:139
std::auto_ptr< CharReader > CharReaderPtr
Definition: json_reader.cpp:58
An error tagged with where in the JSON text it was encountered.
Definition: reader.h:46