美文网首页
json_reader.cpp

json_reader.cpp

作者: 山的那边是什么_ | 来源:发表于2018-12-28 23:50 被阅读18次

@[TOC]

1. 代码思维导图

json_reader.png

2. 详解

下面对各个模块讲解,为了便于理解,从最下层开始;

2.1 parse-1

/*
* document 需要解析的string 字符串
* root 解析后的结果
* collectComments 说明信息
*/
bool Reader::parse(const std::string& document,
                   Value& root,
                   bool collectComments) {
  document_.assign(document.begin(), document.end());
  const char* begin = document_.c_str();
  const char* end = begin + document_.length();
  return parse(begin, end, root, collectComments);
}

2.2 parse-2

bool Reader::parse(const char* beginDoc,
                   const char* endDoc,
                   Value& root,
                   bool collectComments) {
  if (!features_.allowComments_) {
    collectComments = false;
  }
  // 相关值初始化
  begin_ = beginDoc;
  end_ = endDoc;
  collectComments_ = collectComments;
  current_ = begin_;
  lastValueEnd_ = 0;
  lastValue_ = 0;
  commentsBefore_.clear();
  errors_.clear();
  while (!nodes_.empty())
    nodes_.pop(); // 上次的结果出栈
  nodes_.push(&root); // 根节点入栈,第一次root 是空的,这个入栈的是地址;
  // readObject() or ReadArray().
  bool successful = readValue();
  Token token;
  skipCommentTokens(token);
  if (collectComments_ && !commentsBefore_.empty())
    root.setComment(commentsBefore_, commentAfter);
  if (features_.strictRoot_) {
    if (!root.isArray() && !root.isObject()) {
      // Set error location to start of doc, ideally should be first token found
      // in doc
      token.type_ = tokenError;
      token.start_ = beginDoc;
      token.end_ = endDoc;
      addError(
          "A valid JSON document must be either an array or an object value.",
          token);
      return false;
    }
  }
  return successful;
}

2.3 readValue()

/*
* 读取
*/
bool Reader::readValue() {
  // readValue() may call itself only if it calls readObject() or ReadArray().
  // These methods execute nodes_.push() just before and nodes_.pop)() just
  // after calling readValue(). parse() executes one nodes_.push(), so > instead
  // of >=.
  if (nodes_.size() > stackLimit_g)
    throwRuntimeError("Exceeded stackLimit in readValue().");

  Token token;
  skipCommentTokens(token);
  bool successful = true;

  if (collectComments_ && !commentsBefore_.empty()) {
    currentValue().setComment(commentsBefore_, commentBefore);
    commentsBefore_.clear();
  }

  switch (token.type_) {
  case tokenObjectBegin:
    successful = readObject(token);
    currentValue().setOffsetLimit(current_ - begin_);
    break;
  case tokenArrayBegin:
    successful = readArray(token);
    currentValue().setOffsetLimit(current_ - begin_);
    break;
  case tokenNumber:
    successful = decodeNumber(token);
    break;
  case tokenString:
    successful = decodeString(token);
    break;
  case tokenTrue: {
    Value v(true);
    currentValue().swapPayload(v);
    currentValue().setOffsetStart(token.start_ - begin_);
    currentValue().setOffsetLimit(token.end_ - begin_);
  } break;
  case tokenFalse: {
    Value v(false);
    currentValue().swapPayload(v);
    currentValue().setOffsetStart(token.start_ - begin_);
    currentValue().setOffsetLimit(token.end_ - begin_);
  } break;
  case tokenNull: {
    Value v;
    currentValue().swapPayload(v);
    currentValue().setOffsetStart(token.start_ - begin_);
    currentValue().setOffsetLimit(token.end_ - begin_);
  } break;
  case tokenArraySeparator:
  case tokenObjectEnd:
  case tokenArrayEnd:
    if (features_.allowDroppedNullPlaceholders_) {
      // "Un-read" the current token and mark the current value as a null
      // token.
      current_--;
      Value v;
      currentValue().swapPayload(v);
      currentValue().setOffsetStart(current_ - begin_ - 1);
      currentValue().setOffsetLimit(current_ - begin_);
      break;
    } // Else, fall through...
  default:
    currentValue().setOffsetStart(token.start_ - begin_);
    currentValue().setOffsetLimit(token.end_ - begin_);
    return addError("Syntax error: value, object or array expected.", token);
  }

  if (collectComments_) {
    lastValueEnd_ = current_;
    lastValue_ = &currentValue();
  }

  return successful;
}

2.4 skipCommentTokens()

对于当前值

void Reader::skipCommentTokens(Token& token) {
  if (features_.allowComments_) {
    do {
      readToken(token);
    } while (token.type_ == tokenComment);
  } else {
    readToken(token);
  }
}

2.5 readToken(Token& token)

跟进不同字符,识别不同的数据类型;

bool Reader::readToken(Token& token) {
  skipSpaces(); // 跳过空格
  token.start_ = current_; // 当前指针的位置,  token.start_ 存储json数据起始位置
  Char c = getNextChar(); // 下一个指针
  bool ok = true;
  switch (c) { // 下一位的类型,开始标记
  case '{':
    token.type_ = tokenObjectBegin; // {  object的开始
    break;
  case '}':
    token.type_ = tokenObjectEnd; // } object 的结束
    break;
  case '[':
    token.type_ = tokenArrayBegin; // [ arrya的开始
    break;
  case ']':
    token.type_ = tokenArrayEnd; // ] array 的结束
    break;
  case '"':
    token.type_ = tokenString; //  "  字符串的开始,直接读取当前字符串
    ok = readString();
    break;
  case '/':
    token.type_ = tokenComment; // / 评论内容
    ok = readComment();
    break;
  case '0': // 说明是数字, 读取数字
  case '1':
  case '2':
  case '3':
  case '4':
  case '5':
  case '6':
  case '7':
  case '8':
  case '9':
  case '-':
    token.type_ = tokenNumber;
    readNumber();
    break;
  case 't': // true ,由于t 已经读取了,只需要匹配后面几个字符,下面几个类似
    token.type_ = tokenTrue;
    ok = match("rue", 3);
    break;
  case 'f': // false
    token.type_ = tokenFalse;
    ok = match("alse", 4);
    break;
  case 'n':
    token.type_ = tokenNull;
    ok = match("ull", 3);
    break;
  case ',': // 数组的分开标识
    token.type_ = tokenArraySeparator;
    break;
  case ':': // map 的分开标识
    token.type_ = tokenMemberSeparator;
    break;
  case 0:
    token.type_ = tokenEndOfStream;
    break;
  default:
    ok = false;
    break;
  }
  if (!ok)
    token.type_ = tokenError;
  token.end_ = current_; // 当前指针的位置 token.end_ 存储json数据起始位置
  return true;
}

2.5 readNumber()

字符串中读取数字

void Reader::readNumber() {
  const char* p = current_;
  char c = '0'; // stopgap for already consumed character
  // integral part
  while (c >= '0' && c <= '9')
    c = (current_ = p) < end_ ? *p++ : '\0';
  // fractional part
  if (c == '.') {
    c = (current_ = p) < end_ ? *p++ : '\0';
    while (c >= '0' && c <= '9')
      c = (current_ = p) < end_ ? *p++ : '\0';
  }
  // exponential part
  if (c == 'e' || c == 'E') {
    c = (current_ = p) < end_ ? *p++ : '\0';
    if (c == '+' || c == '-')
      c = (current_ = p) < end_ ? *p++ : '\0';
    while (c >= '0' && c <= '9')
      c = (current_ = p) < end_ ? *p++ : '\0';
  }
}

2.5.1 decodeNumber(Token& token)

bool OurReader::decodeNumber(Token& token) {
  Value decoded;
  if (!decodeNumber(token, decoded))
    return false;
  currentValue().swapPayload(decoded); // 更新value 
  currentValue().setOffsetStart(token.start_ - begin_);
  currentValue().setOffsetLimit(token.end_ - begin_);
  return true;
}

2.5.2 decodeNumber(Token& token, Value& decoded)

解析数字

bool OurReader::decodeNumber(Token& token, Value& decoded) {
  // Attempts to parse the number as an integer. If the number is
  // larger than the maximum supported value of an integer then
  // we decode the number as a double.
  Location current = token.start_; // 数字的起始位置
  bool isNegative = *current == '-';
  if (isNegative)
    ++current;
  // TODO: Help the compiler do the div and mod at compile time or get rid of
  // them.
  Value::LargestUInt maxIntegerValue =
      isNegative ? Value::LargestUInt(-Value::minLargestInt)
                 : Value::maxLargestUInt;
  Value::LargestUInt threshold = maxIntegerValue / 10;
  Value::LargestUInt value = 0;
  Location current = token.start_; // 数字的起始位置
  bool isNegative = *current == '-'; // 负数
  if (isNegative)
    ++current;
  // TODO: Help the compiler do the div and mod at compile time or get rid of
  // them.
  Value::LargestUInt maxIntegerValue =
      isNegative ? Value::LargestUInt(-Value::minLargestInt)
                 : Value::maxLargestUInt;
  Value::LargestUInt threshold = maxIntegerValue / 10;
  Value::LargestUInt value = 0;
  while (current < token.end_) { // 顺序移动指针, token.end_ 是数字结束的位置
    Char c = *current++;
    if (c < '0' || c > '9')
      return decodeDouble(token, decoded);
    Value::UInt digit(static_cast<Value::UInt>(c - '0'));
    if (value >= threshold) {
      // We've hit or exceeded the max value divided by 10 (rounded down). If
      // a) we've only just touched the limit, b) this is the last digit, and
      // c) it's small enough to fit in that rounding delta, we're okay.
      // Otherwise treat this number as a double to avoid overflow.
      if (value > threshold || current != token.end_ ||
          digit > maxIntegerValue % 10) {
        return decodeDouble(token, decoded);
      }
    }
    value = value * 10 + digit; // 累加数字
  }
  if (isNegative)
    decoded = -Value::LargestInt(value);
  else if (value <= Value::LargestUInt(Value::maxInt))
    decoded = Value::LargestInt(value);
  else
    decoded = value;
  return true;
}

2.6 readString()

// 读取字符串
bool Reader::readString() {
  Char c = '\0';
  while (current_ != end_) {
    c = getNextChar();
    if (c == '\\')
      getNextChar();
    else if (c == '"')
      break;
  }
  return c == '"';
}

2.6.1 decodeString

// 解析字符串
bool OurReader::decodeString(Token& token) {
  JSONCPP_STRING decoded_string;
  if (!decodeString(token, decoded_string))
    return false;
  Value decoded(decoded_string);
  currentValue().swapPayload(decoded); // 设置字符串的字
  currentValue().setOffsetStart(token.start_ - begin_);
  currentValue().setOffsetLimit(token.end_ - begin_);
  return true;
}

具体解析的函数

bool OurReader::decodeString(Token& token, JSONCPP_STRING& decoded) {
  decoded.reserve(static_cast<size_t>(token.end_ - token.start_ - 2)); // 字符串的长度
  Location current = token.start_ + 1; // skip '"' 字符串其实位置
  Location end = token.end_ - 1;       // do not include '"' 字符串结束位置
  while (current != end) {
    Char c = *current++;
    if (c == '"')
      break;
    else if (c == '\\') {
      if (current == end)
        return addError("Empty escape sequence in string", token, current);
      Char escape = *current++;
      switch (escape) {
      case '"':
        decoded += '"';
        break;
      case '/':
        decoded += '/';
        break;
      case '\\':
        decoded += '\\';
        break;
      case 'b':
        decoded += '\b';
        break;
      case 'f':
        decoded += '\f';
        break;
      case 'n':
        decoded += '\n';
        break;
      case 'r':
        decoded += '\r';
        break;
      case 't':
        decoded += '\t';
        break;
      case 'u': {
        unsigned int unicode;
        if (!decodeUnicodeCodePoint(token, current, end, unicode))
          return false;
        decoded += codePointToUTF8(unicode);
      } break;
      default:
        return addError("Bad escape sequence in string", token, current);
      }
    } else {
      decoded += c;
    }
  }
  return true;
}

2.7 readArray

// 解析数组,由于数组里面的每个原始可以是 数字、object、array,需要调用readValue
bool Reader::readArray(Token& token) {
  Value init(arrayValue);
  currentValue().swapPayload(init);
  currentValue().setOffsetStart(token.start_ - begin_);
  skipSpaces();
  if (current_ != end_ && *current_ == ']') // empty array
  {
    Token endArray;
    readToken(endArray);
    return true;
  }
  int index = 0;
  for (;;) {
    Value& value = currentValue()[index++];
    nodes_.push(&value); // 栈中加入当前节点的值;
    bool ok = readValue();
    nodes_.pop();
    if (!ok) // error already set
      return recoverFromError(tokenArrayEnd);

    Token currentToken;
    // Accept Comment after last item in the array.
    ok = readToken(currentToken);
    while (currentToken.type_ == tokenComment && ok) {
      ok = readToken(currentToken);
    }
    bool badTokenType = (currentToken.type_ != tokenArraySeparator &&
                         currentToken.type_ != tokenArrayEnd);
    if (!ok || badTokenType) {
      return addErrorAndRecover("Missing ',' or ']' in array declaration",
                                currentToken, tokenArrayEnd);
    }
    if (currentToken.type_ == tokenArrayEnd)
      break;
  }
  return true;
}

2.8 readObject

/*
* 读取一个对象
*/
bool OurReader::readObject(Token& token) {
  Token tokenName;
  JSONCPP_STRING name;
  Value init(objectValue);
  currentValue().swapPayload(init);
  currentValue().setOffsetStart(token.start_ - begin_);
  while (readToken(tokenName)) {
    // 读取key 
    bool initialTokenOk = true;
    while (tokenName.type_ == tokenComment && initialTokenOk)
      initialTokenOk = readToken(tokenName);
    if (!initialTokenOk)
      break;
    if (tokenName.type_ == tokenObjectEnd && name.empty()) // empty object
      return true;
    name.clear();
    if (tokenName.type_ == tokenString) {
      if (!decodeString(tokenName, name))
        return recoverFromError(tokenObjectEnd);
    } else if (tokenName.type_ == tokenNumber && features_.allowNumericKeys_) {
      Value numberName;
      if (!decodeNumber(tokenName, numberName))
        return recoverFromError(tokenObjectEnd);
      name = numberName.asString();
    } else {
      break;
    }
    // : 是分隔标记
    Token colon;
    if (!readToken(colon) || colon.type_ != tokenMemberSeparator) {
      return addErrorAndRecover("Missing ':' after object member name", colon,
                                tokenObjectEnd);
    }
    if (name.length() >= (1U << 30))
      throwRuntimeError("keylength >= 2^30");
    if (features_.rejectDupKeys_ && currentValue().isMember(name)) {
      JSONCPP_STRING msg = "Duplicate key: '" + name + "'";
      return addErrorAndRecover(msg, tokenName, tokenObjectEnd);
    }
    // 读取value 
    Value& value = currentValue()[name];
    nodes_.push(&value);
    bool ok = readValue();
    nodes_.pop();
    if (!ok) // error already set
      return recoverFromError(tokenObjectEnd);
    // 读取评论
    Token comma;
    if (!readToken(comma) ||
        (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator &&
         comma.type_ != tokenComment)) {
      return addErrorAndRecover("Missing ',' or '}' in object declaration",
                                comma, tokenObjectEnd);
    }
    bool finalizeTokenOk = true;
    while (comma.type_ == tokenComment && finalizeTokenOk)
      finalizeTokenOk = readToken(comma);
    if (comma.type_ == tokenObjectEnd)
      return true;
  }
  return addErrorAndRecover("Missing '}' or object member name", tokenName,
                            tokenObjectEnd);
}

相关文章

  • json_reader.cpp

    @[TOC] 1. 代码思维导图 2. 详解 下面对各个模块讲解,为了便于理解,从最下层开始; 2.1 parse...

网友评论

      本文标题:json_reader.cpp

      本文链接:https://www.haomeiwen.com/subject/qquylqtx.html