美文网首页
手写一个语法分析器

手写一个语法分析器

作者: 镜月花水 | 来源:发表于2022-03-19 23:58 被阅读0次

    语法分析

    先实现一个简单的语法分析,用BNF表示如下:

    expression -> equality;
    equality   -> comparison ( ( "!=" | "==" ) comparsion )*;
    comparison -> term ( (">" | ">=" | "<" | "<=") term)*;
    term       -> factor ( ("-" | "+") factor)*;
    factor     -> unary (( "/" | "*") unary)*;
    unary      -> ( "!" | "-") unary | primary;   
    primary    -> NUMBER | STRING | "true" | "false" | "nil" | "(" expression ")";
    

    我们用递归下降解析来实现,最后用语法树表示。
    参考维基百科中递归下降解析器的说明, 递归下降是一种自上而下的解析器,由一组相互递归的程序(或等价的非递归程序)构建而成,其中每个程序都实现了文法中的一个非终结符。因此,这些程序的结构密切反映了它所识别的文法结构。
    例如文法

            S->cAd
            A->ab|a
    

    用下面的方式来解析:

    class compilerEngile {
        constructor(input) {
            // ...
        }
        
        compilerS() {
            // ...
        }
    
        compilerA() {
            // ...
        }
    
        run() {
            this.compilerS(); // Start !!!
        }
    }
    

    语法解析

    语法解析结果我们用语法树表示。 通过上述的BNF可以看到,这里存在递归引用。我们用Expr类作为基类表示。 其他的都是Expr的子类。如下图所示:

    class Expr {
        static class Binary extends Expr {
            Binary(Expr left, Token operator, Expr right) {
                this.left = left;
                this.operator = operator;
                this.right = right;
            }
    
            final Expr left;
            final Token operator;
            final Expr right;
        }
        
        static class Literal extends Expr {
            Literal(Object value) {
                this.value = value;
            }
    
            final Object value;
        }
    }
    

    然后我们对词法解析完的token list进行解析,按照上述BNF解析完后,得到一个以Expr为root节点的语法树。其中的节点是Expr的个子类。

    解析

    public class Parser {
        private List<Token> tokens;
        private int position = 0;
    
        public Expr parse(List<Token> tokens) {
            this.tokens = tokens;
            return expression();
        }
    
        private Expr expression() {
            return equality();
        }
    
        private Expr equality() { // != == 都是左运算符
            Expr expr = comparison();
            while (match(TokenType.BANG_EQUAL, TokenType.EQUAL_EQUAL)) {
                Token operator = previous();
                Expr right = comparison(); 
                expr = new Expr.Binary(expr, operator, right);
            }
            return expr;
        }
    
        private Expr comparison() {
            Expr expr = term();
            while (match(TokenType.GREATER, TokenType.GREATER_EQUAL, TokenType.LESS, TokenType.LESS_EQUAL)) {
                Token operator = previous();
                Expr right = term();
                expr = new Expr.Binary(expr, operator, right);
            }
            return expr;
        }
    
        private Expr term() {
            Expr expr = factor();
            while (match(TokenType.MINUS, TokenType.PLUS)) {
                Token operator = previous();
                Expr right = factor();
                expr = new Expr.Binary(expr, operator, right);
            }
            return expr;
        }
    
        private Expr factor() {
            Expr expr = unary();
            while (match(TokenType.SLASH, TokenType.STAR)) {
                Token operator = previous();
                Expr right = unary();
                expr = new Expr.Binary(expr, operator, right);
            }
            return expr;
        }
    
        private Expr unary() {
            if (match(TokenType.BANG, TokenType.MINUS)) {
                Token operator = previous();
                Expr right = unary();
                return new Expr.Unary(operator, right);
            }
            return primary();
        }
    
        private Expr primary() {
            Token cToken = current();
            System.out.print(String.format("Current Token %s, position %d", cToken, this.position));
            if (match(TokenType.NUMBER)) {
                Token token = previous();
                return new Expr.Literal(token.value);
            }
            if (match(TokenType.STRING)) {
                Token token = previous();
                return new Expr.Literal(token.value);
            }
            if (match(TokenType.TRUE)) {
                return new Expr.Literal(true);
            }
            
            if (match(TokenType.FALSE)) {
                return new Expr.Literal(false);
            }
    
            if (match(TokenType.NIL)) {
                return new Expr.Literal(null);
            }
    
            if (match(TokenType.LEFT_PAREN)) {
                Expr expr = expression();
                consume(TokenType.RIGHT_PAREN, "Expect ')' ");
                return new Expr.Grouping(expr);
            }
    
            throw new Error("Parse error");
        }
    
        private boolean match(TokenType ...types) {
            Token token = current();
            for (TokenType type: types) {
                if (token.tokenType == type) {
                    this.advance();
                    return true;
                }
            }
            return false;
        }
    
        private Token current() {
            return this.tokens.get(this.position);
        }
    
        private void advance() {
            if (!isEnd()) {
                this.position ++;
            }
        }
    
        private Token previous() {
            return this.tokens.get(this.position - 1);
        }
        
        private void consume(TokenType tokenType, String errmsg) {
            if (!match(tokenType)) {
                Runner.error(errmsg);
            }
        }
    
        private boolean isEnd() {
            return current().tokenType == TokenType.EOF;
        }
    
    }
    

    Expr的定义如下

    abstract class Expr {
        static class Binary extends Expr {
            Binary(Expr left, Token operator, Expr right) {
                this.left = left;
                this.operator = operator;
                this.right = right;
            }
    
            final Expr left;
            final Token operator;
            final Expr right;
    
        }
    
        static class Unary extends Expr {
            Unary(Token operator, Expr unary) {
                this.operator = operator;
                this.unary = unary;
            }
    
            final Token operator;
            final Expr unary;
    
        }
    
        static class Literal extends Expr  {
            Literal(Object value) {
                this.value = value;
            }
    
            final Object value;
    
        }
    
        static class Grouping extends Expr {
            Grouping(Expr expr) {
                this.expr = expr;
            }
            final Expr expr;
    
        }
        
    }
    

    这个时候可以开始解析
    调用Parse.parse(tokens),最终会返回一个以Expr为root的语法树。这里,为了方便查看,我们把语法树输出来,这个就涉及到对语法树的遍历处理。一般用visitor模式来遍历处理。 这里用visitor模式,不是因为visitor的名字暗示的这样,方便查看遍历,而是对AST的处理,有很多中,比方说,打印,检查类型, 执行等。用visitor模式,可以再不修改Expr类的情况下,只扩展新的操作类就可以。
    我们把原来的Expr改成如下所示:

    abstract class Expr {
    
        interface Visitor<R> {
            R visitBinaryExpr(Binary expr);
            R visitUnaryExpr(Unary expr);
            R visitLiteralExpr(Literal expr);
            R visitGroupingExpr(Grouping expr);
        }
    
        abstract <R> R accept(Visitor<R> visitor);
    
        static class Binary extends Expr {
            Binary(Expr left, Token operator, Expr right) {
                this.left = left;
                this.operator = operator;
                this.right = right;
            }
    
            final Expr left;
            final Token operator;
            final Expr right;
    
            @Override
            <R> R accept(Visitor<R> visitor) {
                return visitor.visitBinaryExpr(this);
            }
        }
    
        static class Unary extends Expr {
            Unary(Token operator, Expr unary) {
                this.operator = operator;
                this.unary = unary;
            }
    
            final Token operator;
            final Expr unary;
    
            @Override
            <R> R accept(Visitor<R> visitor) {
                return visitor.visitUnaryExpr(this);
            }
        }
    
        static class Literal extends Expr  {
            Literal(Object value) {
                this.value = value;
            }
    
            final Object value;
    
            @Override
            <R> R accept(Visitor<R> visitor) {
                return visitor.visitLiteralExpr(this);
            }
        }
    
        static class Grouping extends Expr {
            Grouping(Expr expr) {
                this.expr = expr;
            }
            final Expr expr;
    
            @Override
            <R> R accept(Visitor<R> visitor) {
                return visitor.visitGroupingExpr(this);
            }
        }
    }
    
    

    创建ASTprinter类, 按照想要的输出格式,处理每个节点

    public class AstPrinter implements Expr.Visitor<String>{
    
        String print(Expr expr) {
            return expr.accept(this);
        }
    
        @Override
        public String visitBinaryExpr(Binary expr) {
            return parenthesize(expr.operator.name, expr.left, expr.right);
        }
    
        @Override
        public String visitUnaryExpr(Unary expr) {
            return parenthesize(expr.operator.name, expr.unary);
        }
    
        @Override
        public String visitLiteralExpr(Literal expr) {
            if (expr.value == null) return "nil";
            return expr.value.toString();
        }
    
        @Override
        public String visitGroupingExpr(Grouping expr) {
            return parenthesize("group", expr.expr);
        }
    
        private String parenthesize(String name, Expr ...exprs) {
            StringBuilder builder = new StringBuilder();
    
            builder.append("(").append(name);
            for (Expr expr: exprs) {
                builder.append(" ");
                builder.append(expr.accept(this));
            }
            builder.append(")");
            return builder.toString();
    
        }
    }
    

    从文件输入测试语法,试试

            String text = readTextFile();
            Scanner scanner  = new Scanner();
            List<Token> tokenlist = scanner.scan(text);
    
            Parser parser = new Parser();
            Expr expr = parser.parse(tokenlist);
    
            AstPrinter printer = new AstPrinter();
            String printResult = printer.print(expr);
            System.out.println("Result " + printResult);
    

    最终输出

    (+ (* 3.0 5.0) 23.0)
    

    相关文章

      网友评论

          本文标题:手写一个语法分析器

          本文链接:https://www.haomeiwen.com/subject/ukcodrtx.html