美文网首页
手写javascript json解析器

手写javascript json解析器

作者: qiuxiaojie | 来源:发表于2019-01-12 13:41 被阅读0次

    json是一种通用的数据格式,语法较为简单,通过写它的解析器是一种非常好的入门编译的方法。

    词法分析

    将输入的字符串切割成一个一个的token

    const TokenTypes = {
      OPEN_OBJECT: '{',
      CLOSE_OBJECT: '}',
      OPEN_ARRAY: '[',
      CLOSE_ARRAY: ']',
      KEY: 'key',
      STRING: 'string',
      NUMBER: 'number',
      TRUE: 'true',
      FALSE: 'false',
      NULL: 'null',
      COLON: ':',
      COMMA: ',',
      EOF: 'eof'
    };
    
    class Token {
      constructor(type, string) {
        this.type = type;
        this.string = string || this.type;
      }
    }
    
    class Lexer {
      constructor(json) {
        this._json = json;
        this._index = 0;
      }
      _isEnd() {
        return this._index > this._json.length;
      }
      _walk() {
        return this._json[this._index ++];
      }
      _currentChat() {
        return this._json[this._index];
      }
      _nextChar() {
        return this._json[this._index + 1];
      }
      _readString() {
        let tmp = '';
        while(! this._isEnd()) {
          const c = this._walk();
          if (c == '"') break;
          if (c == '\\' && this._nextChar() == '"') {
            this._walk();
            tmp += '"';
            continue;
          }
          tmp += c;
        }
        return new Token(TokenTypes.STRING, tmp);
      }
      _isLetter(c) {
        return c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z';
      }
      _isDigit(c) {
        return c >= '0' && c <= '9';
      }
      _readDigit(c) {
        let tmp = c;
        while(! this._isEnd()) {
          const c = this._currentChat();
          if (! this._isDigit(c)) break;
          this._walk();
          tmp += c;
        }
        return new Token(TokenTypes.NUMBER, tmp);
      }
      _readWord(c) {
        let tmp = c;
        while (! this._isEnd()) {
          const c = this._currentChat();
          if (! this._isLetter(c)) break;
          this._walk();
          tmp += c;
        }
        return tmp;
      }
      nextToken() {
        const c = this._walk();
        if (this._isEnd()) return new Token(TokenTypes.EOF);
        switch(c) {
          case ' ':
            return this.nextToken();
          case '{':
            return new Token(TokenTypes.OPEN_OBJECT);
          case '}':
            return new Token(TokenTypes.CLOSE_OBJECT);
          case '[':
            return new Token(TokenTypes.OPEN_ARRAY);
          case ']':
            return new Token(TokenTypes.CLOSE_ARRAY);
          case ':':
            return new Token(TokenTypes.COLON);
          case ',':
            return new Token(TokenTypes.COMMA);
          case '"':
            return this._readString();
        }
        if (this._isDigit(c)) return this._readDigit(c);
        if (this._isLetter(c)) {
          const word = this._readWord(c);
          switch(word) {
            case 'true':
              return new Token(TokenTypes.TRUE);
            case 'false':
              return new Token(TokenTypes.FALSE);
            case 'null':
              return new Token(TokenTypes.NULL);
          }
          throw new Error(`expect true, false, null actual ${word}`);
        }
        throw new Error(`not supported ${c}`);
      }
    }
    

    解析器

    通过匹配到不同token跳转到不同状态,一步一步完成解析

    class Parser {
      constructor(json) {
        this._lexer = new Lexer(json);
        this._token = this._lexer.nextToken();
      }
      _matchToken(type) {
        const string = this._token.string;
        if (! this._isToken(type)) throw new Error(`expect ${type} actual ${this._token.type}`);
        this._walk();
        return string;
      }
      _isToken(type) {
        return this._token.type == type;
      }
      _currentString() {
        return this._token.string;
      }
      _walk() {
        this._token = this._lexer.nextToken();
      }
      parse() {
        return this._visitValue();
      }
      _visitValue() {
        if (this._isToken(TokenTypes.NUMBER)) {
          const str = this._currentString();
          this._walk();
          return parseInt(str);
        }
        if (this._isToken(TokenTypes.TRUE)) {
          this._walk();
          return true;
        }
        if (this._isToken(TokenTypes.FALSE)) {
          this._walk();
          return false;
        }
        if (this._isToken(TokenTypes.STRING)) {
          const str = this._currentString();
          this._walk();
          return str;
        }
        if (this._isToken(TokenTypes.NULL)) {
          this._walk();
          return null;
        }
        if (this._isToken(TokenTypes.OPEN_OBJECT)) {
          this._walk();
          const object = this._visitObject();
          this._matchToken(TokenTypes.CLOSE_OBJECT);
          return object;
        }
        if (this._isToken(TokenTypes.OPEN_ARRAY)) {
          this._walk();
          const array = this._visitArray();
          this._matchToken(TokenTypes.CLOSE_ARRAY);
          return array;
        }
      }
      _visitObject() {
        const object = {};
        while (true) {
          const key = this._matchToken(TokenTypes.STRING);
          this._matchToken(TokenTypes.COLON);
          const value = this._visitValue();
          object[key] = value;
          if (! this._isToken(TokenTypes.COMMA)) break;
          this._walk();
        }
        return object;
      }
      _visitArray() {
        const array = [];
        while (true) {
          const value = this._visitValue();
          array.push(value);
          if (! this._isToken(TokenTypes.COMMA)) break;
          this._walk()
        }
        return array;
      }
    }
    
    const array = [{t: true, f: false, n: null, num: 12, s: 'str'}];
    const json = JSON.stringify(array);
    // [{"t":true,"f":false,"n":null,"num":12,"s":"str"}]
    console.log(json);
    const parser = new Parser(json);
    const result = parser.parse();
    // [ { t: true, f: false, n: null, num: 12, s: 'str' } ]
    console.log(result);
    

    简单调用

    const json = `[{"f": false, "t": true, "s": "string", "n": null, "num": 12}]`;
    const parser = new Parser(json);
    const result = parser.parse();
    console.log(result);
    

    最终打印

    [
      {
        "f": false,
        "t": true,
        "s": "string",
        "n": null,
        "num": 12
      }
    ]
    

    github gist 完整代码

    相关文章

      网友评论

          本文标题:手写javascript json解析器

          本文链接:https://www.haomeiwen.com/subject/fobqdqtx.html