c# – Antlr错误策略跳过令牌,直到规则再次匹配

2024年1月11日 166次阅读

我试过
this解决方案,但它似乎对我不起作用

这是我的语法的摘录：

module
    : BEGIN MODULE IDENT STRING module_element* END MODULE
    ;

module_element
    : element_1 | element_2 | element_3 | ...
    ;

每个元素下面都有一棵更大的树.现在,当发生RecognitionException时,我想要使用标记,直到下一个module_element匹配或父END MODULE匹配为止.

有关如何在继承自DefaultErrorStrategy的类中执行此操作的任何提示？

编辑：这是一个MCVE：

Program.cs中

namespace AntlrExample
{
    class Program
    {
        static void Main(string[] args)
        {
            var fileToParse = @"C:\temp\MyGrammarExample.txt";

            try
            {
                Parse(fileToParse);
            }
            catch (Exception e)
            {
                Console.WriteLine("Exception: " + e);
            }
        }

        private static void Parse(string filePath)
        {
            var lexer = new MyGrammarLexer(new AntlrFileStream(filePath, Encoding.Default));

            var parser = new MyGrammarParser(new CommonTokenStream(lexer));

            parser.AddParseListener(new MyGrammarListener());

            parser.startnode();
        }
    }
}

MyGrammar.g4：

grammar MyGrammar;

@parser::members
{
    protected const int EOF = Eof;
}

@lexer::members
{
    protected const int EOF = Eof;
    protected const int HIDDEN = Hidden;
}

startnode
    :   module
    ;

module
    : BEGIN MODULE IDENT STRING module_element* END MODULE
    ;

module_element
    :   element_1 | element_2
    ;

element_1
    :   BEGIN ELEMENT1 name=IDENT desc=STRING other1=IDENT other2=IDENT END ELEMENT1
    ;

element_2
    :   BEGIN ELEMENT2 name=IDENT desc=STRING other1=IDENT other2=IDENT other3=INT END ELEMENT2
    ;

BEGIN : 'BEGIN';
MODULE: 'MODULE';
END: 'END';
ELEMENT1 : 'ELEMENT1';
ELEMENT2 : 'ELEMENT2';

IDENT 
    : LETTER (LETTER|'0'..'9'|'['|']'|'.')* 
    ;

fragment LETTER 
    : 'A'..'Z' | 'a'..'z' | '_' 
    ;

STRING
    : '"' ('\\' (.) | '"''"' | ~( '\\' | '"'))* '"'
    ;

INT
    : MINUS? DIGIT+
    ;

fragment MINUS
    : '-'
    ;

DIGIT
    : '0'..'9'
    ;

WS
    : ( ' ' | '\t' | '\r' | '\n')+ -> skip
    ;

MyGrammarListener.cs

namespace AntlrExample.Parser
{
    public class MyGrammarListener : MyGrammarBaseListener
    {
        public override void ExitElement_1(MyGrammarParser.Element_1Context context)
        {
            Console.WriteLine(string.Format("Just parsed an ELEMENT1: {0} {1} {2} {3}", context.name.Text, context.desc.Text, context.other1.Text, context.other2.Text));
        }

        public override void ExitElement_2(MyGrammarParser.Element_2Context context)
        {
            Console.WriteLine(string.Format("Just parsed an ELEMENT2: {0} {1} {2} {3} {4}", context.name.Text, context.desc.Text, context.other1.Text, context.other2.Text, context.other3.Text));
        }
    }
}

MyGrammarExample.txt

BEGIN MODULE MyModule "This is the main module"

    BEGIN ELEMENT1 MyElement1 "This is the first element"
        Something
        Anything
    END ELEMENT1

    BEGIN ELEMENT1 MyElement2 "This is the second element"
        SomethingMore
        AnythingMore
    END ELEMENT1

    BEGIN ELEMENT2 MyFirstElement2 "This one will fail"
        Foo
        Bar
        HereShouldBeAnInt
    END ELEMENT2

    BEGIN ELEMENT2 MySecondElement2 "This one should parse even though the parser failed to parse the one before"
        RealFoo
        RealBar
        34
    END ELEMENT2

END MODULE

最佳答案您应该能够使用此错误策略类完成此操作：

internal class MyGrammarErrorStrategy : DefaultErrorStrategy
{
    public override void Recover(Parser recognizer, RecognitionException e)
    {
        // This should should move the current position to the next 'END' token
        base.Recover(recognizer, e);

        ITokenStream tokenStream = (ITokenStream)recognizer.InputStream;

        // Verify we are where we expect to be
        if (tokenStream.La(1) == MyGrammarParser.END)
        {
            // Get the next possible tokens
            IntervalSet intervalSet = GetErrorRecoverySet(recognizer);

            // Move to the next token
            tokenStream.Consume();

            // Move to the next possible token
            // If the errant element is the last in the set, this will move to the 'END' token in 'END MODULE'.
            // If there are subsequent elements in the set, this will move to the 'BEGIN' token in 'BEGIN module_element'.
            ConsumeUntil(recognizer, intervalSet);
        }
    }
}

然后相应地设置错误处理程序：

parser.ErrorHandler = new MyGrammarErrorStrategy();

我们的想法是,我们首先允许默认的Recover实现将当前位置移动到“重新同步集”,在这种情况下,它是下一个END令牌.随后,我们使用提供的错误恢复集消耗额外的令牌,将位置移动到我们需要的位置.根据错误的module_element是否是模块中的最后一个,此结果位置将有所不同.