# 2.19 实现一个简单的递归下降分析器¶

## 解决方案¶

```expr ::= expr + term
|   expr - term
|   term

term ::= term * factor
|   term / factor
|   factor

factor ::= ( expr )
|   NUM
```

```expr ::= term { (+|-) term }*

term ::= factor { (*|/) factor }*

factor ::= ( expr )
|   NUM
```

```NUM + NUM * NUM
```

```expr
expr ::= term { (+|-) term }*
expr ::= factor { (*|/) factor }* { (+|-) term }*
expr ::= NUM { (*|/) factor }* { (+|-) term }*
expr ::= NUM { (+|-) term }*
expr ::= NUM + term { (+|-) term }*
expr ::= NUM + factor { (*|/) factor }* { (+|-) term }*
expr ::= NUM + NUM { (*|/) factor}* { (+|-) term }*
expr ::= NUM + NUM * factor { (*|/) factor }* { (+|-) term }*
expr ::= NUM + NUM * NUM { (*|/) factor }* { (+|-) term }*
expr ::= NUM + NUM * NUM { (+|-) term }*
expr ::= NUM + NUM * NUM
```

```#!/usr/bin/env python
# -*- encoding: utf-8 -*-
"""
Topic: 下降解析器
Desc :
"""
import re
import collections

# Token specification
NUM = r'(?P<NUM>\d+)'
PLUS = r'(?P<PLUS>\+)'
MINUS = r'(?P<MINUS>-)'
TIMES = r'(?P<TIMES>\*)'
DIVIDE = r'(?P<DIVIDE>/)'
LPAREN = r'(?P<LPAREN>\()'
RPAREN = r'(?P<RPAREN>\))'
WS = r'(?P<WS>\s+)'

master_pat = re.compile('|'.join([NUM, PLUS, MINUS, TIMES,
DIVIDE, LPAREN, RPAREN, WS]))
# Tokenizer
Token = collections.namedtuple('Token', ['type', 'value'])

def generate_tokens(text):
scanner = master_pat.scanner(text)
for m in iter(scanner.match, None):
tok = Token(m.lastgroup, m.group())
if tok.type != 'WS':
yield tok

# Parser
class ExpressionEvaluator:
'''
Implementation of a recursive descent parser. Each method
implements a single grammar rule. Use the ._accept() method
to test and accept the current lookahead token. Use the ._expect()
method to exactly match and discard the next token on on the input
(or raise a SyntaxError if it doesn't match).
'''

def parse(self, text):
self.tokens = generate_tokens(text)
self.tok = None  # Last symbol consumed
self.nexttok = None  # Next symbol tokenized
return self.expr()

self.tok, self.nexttok = self.nexttok, next(self.tokens, None)

def _accept(self, toktype):
'Test and consume the next token if it matches toktype'
if self.nexttok and self.nexttok.type == toktype:
return True
else:
return False

def _expect(self, toktype):
'Consume next token if it matches toktype or raise SyntaxError'
if not self._accept(toktype):
raise SyntaxError('Expected ' + toktype)

# Grammar rules follow
def expr(self):
"expression ::= term { ('+'|'-') term }*"
exprval = self.term()
while self._accept('PLUS') or self._accept('MINUS'):
op = self.tok.type
right = self.term()
if op == 'PLUS':
exprval += right
elif op == 'MINUS':
exprval -= right
return exprval

def term(self):
"term ::= factor { ('*'|'/') factor }*"
termval = self.factor()
while self._accept('TIMES') or self._accept('DIVIDE'):
op = self.tok.type
right = self.factor()
if op == 'TIMES':
termval *= right
elif op == 'DIVIDE':
termval /= right
return termval

def factor(self):
"factor ::= NUM | ( expr )"
if self._accept('NUM'):
return int(self.tok.value)
elif self._accept('LPAREN'):
exprval = self.expr()
self._expect('RPAREN')
return exprval
else:
raise SyntaxError('Expected NUMBER or LPAREN')

def descent_parser():
e = ExpressionEvaluator()
print(e.parse('2'))
print(e.parse('2 + 3'))
print(e.parse('2 + 3 * 4'))
print(e.parse('2 + (3 + 4) * 5'))
# print(e.parse('2 + (3 + * 4)'))
# Traceback (most recent call last):
#    File "<stdin>", line 1, in <module>
#    File "exprparse.py", line 40, in parse
#    return self.expr()
#    File "exprparse.py", line 67, in expr
#    right = self.term()
#    File "exprparse.py", line 77, in term
#    termval = self.factor()
#    File "exprparse.py", line 93, in factor
#    exprval = self.expr()
#    File "exprparse.py", line 67, in expr
#    right = self.term()
#    File "exprparse.py", line 77, in term
#    termval = self.factor()
#    File "exprparse.py", line 97, in factor
#    raise SyntaxError("Expected NUMBER or LPAREN")
#    SyntaxError: Expected NUMBER or LPAREN

if __name__ == '__main__':
descent_parser()
```

## 讨论¶

```expr ::= term { ('+'|'-') term }*

term ::= factor { ('*'|'/') factor }*

factor ::= '(' expr ')'
| NUM
```

```class ExpressionEvaluator:
...
def expr(self):
...
def term(self):
...
def factor(self):
...
```

• 如果规则中的下个符号是另外一个语法规则的名字(比如term或factor)，就简单的调用同名的方法即可。 这就是该算法中”下降”的由来 - 控制下降到另一个语法规则中去。 有时候规则会调用已经执行的方法(比如，在 `factor ::= '('expr ')'` 中对expr的调用)。 这就是算法中”递归”的由来。

• 如果规则中下一个符号是个特殊符号(比如()，你得查找下一个令牌并确认是一个精确匹配)。 如果不匹配，就产生一个语法错误。这一节中的 `_expect()` 方法就是用来做这一步的。

• 如果规则中下一个符号为一些可能的选择项(比如 + 或 -)， 你必须对每一种可能情况检查下一个令牌，只有当它匹配一个的时候才能继续。 这也是本节示例中 `_accept()` 方法的目的。 它相当于_expect()方法的弱化版本，因为如果一个匹配找到了它会继续， 但是如果没找到，它不会产生错误而是回滚(允许后续的检查继续进行)。

• 对于有重复部分的规则(比如在规则表达式 `::= term { ('+'|'-') term }*` 中)， 重复动作通过一个while循环来实现。 循环主体会收集或处理所有的重复元素直到没有其他元素可以找到。

• 一旦整个语法规则处理完成，每个方法会返回某种结果给调用者。 这就是在解析过程中值是怎样累加的原理。 比如，在表达式求值程序中，返回值代表表达式解析后的部分结果。 最后所有值会在最顶层的语法规则方法中合并起来。

```items ::= items ',' item
| item
```

```def items(self):
itemsval = self.items()
if itemsval and self._accept(','):
itemsval.append(self.item())
else:
itemsval = [ self.item() ]
```

```expr ::= factor { ('+'|'-'|'*'|'/') factor }*

factor ::= '(' expression ')'
| NUM
```

```from ply.lex import lex
from ply.yacc import yacc

# Token list
tokens = [ 'NUM', 'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'LPAREN', 'RPAREN' ]
# Ignored characters
t_ignore = ' \t\n'
# Token specifications (as regexs)
t_PLUS = r'\+'
t_MINUS = r'-'
t_TIMES = r'\*'
t_DIVIDE = r'/'
t_LPAREN = r'\('
t_RPAREN = r'\)'

# Token processing functions
def t_NUM(t):
r'\d+'
t.value = int(t.value)
return t

# Error handler
def t_error(t):
t.skip(1)

# Build the lexer
lexer = lex()

# Grammar rules and handler functions
def p_expr(p):
'''
expr : expr PLUS term
| expr MINUS term
'''
if p[2] == '+':
p[0] = p[1] + p[3]
elif p[2] == '-':
p[0] = p[1] - p[3]

def p_expr_term(p):
'''
expr : term
'''
p[0] = p[1]

def p_term(p):
'''
term : term TIMES factor
| term DIVIDE factor
'''
if p[2] == '*':
p[0] = p[1] * p[3]
elif p[2] == '/':
p[0] = p[1] / p[3]

def p_term_factor(p):
'''
term : factor
'''
p[0] = p[1]

def p_factor(p):
'''
factor : NUM
'''
p[0] = p[1]

def p_factor_group(p):
'''
factor : LPAREN expr RPAREN
'''
p[0] = p[2]

def p_error(p):
print('Syntax error')

parser = yacc()
```

```>>> parser.parse('2')
2
>>> parser.parse('2+3')
5
>>> parser.parse('2+(3+4)*5')
37
>>>
```