极大极小算法以及Alpha_beta剪枝
# python3
# author :李先生
# time :2018.10.28
# email :[email protected]
class TicTacToe(object):
'''
井字棋游戏:
player:当前落子玩家,-1代表AI,1代表人类
board:棋盘
'''
# 传入谁先手,不传入默认AI先手
def __init__(self,depth,first = -1):
self.depth = depth
self.restart(first)
# 重新开始游戏,player先手
def restart(self,player):
self.board = [0]*9
self.player = player
self.bestmove = -1
# 落子函数
def go(self,pos):
self.board[pos]=self.player
self.player = -self.player
# 撤销落子函数
def cancel_go(self,pos):
self.board[pos]=0
self.player = -self.player
def print_board(self):
print("-"*20)
for i in range(3):
for j in range(3):
if self.board[i*3+j]==-1:
print("",end="X ")
elif self.board[i*3+j]==1:
print("",end="O ")
else:
print("",end="_ ")
print()
print("-"*20)
# 游戏未结束返回0,AI胜利返回-1,人类获胜返回1,平局返回2
def get_winner(self):
if any([var==3 or var ==-3 for var in [sum(self.board[i:i+3]) for i in range(0,9,3)]]):
return -self.player
if any([var==3 or var ==-3 for var in [sum(self.board[i:7+i:3]) for i in range(0,3)]]):
return -self.player
if any([var==3 or var ==-3 for var in [sum(self.board[:9:4]),sum(self.board[2:7:2])]]):
return -self.player
return 0 if self.board.count(0)>0 else 2
def evaluate_minmax(self):
winner = self.get_winner()
if winner == 2:
return 0
return 1000000*winner
def evaluate_nega_max(self):
winner = self.get_winner()
if winner == 2:
return 0
return 1000000*winner*self.player
# 极大极小算法
def min_max(self,depth):
# 搜索深度耗尽或者某一方获胜
winner = self.get_winner()
if winner!=0 or depth==0:
return self.evaluate_minmax()
# 继续搜索
# 走步生成
moves = [x for x in range(9) if self.board[x]==0]
# 初始化最优值
bestvalue = -1000000*self.player
for pos in moves:
self.go(pos)
value = self.min_max(depth-1)
self.cancel_go(pos)
if self.player==-1:
if value<=bestvalue:
bestvalue = value
if depth==self.depth:
self.bestmove = pos
else:
if value>=bestvalue:
bestvalue = value
if depth==self.depth:
self.bestmove = pos
return bestvalue
# 负极大算法
def nega_max(self,depth):
# 搜索深度耗尽或者某一方获胜
winner = self.get_winner()
if winner!=0 or depth==0:
return self.evaluate_nega_max()
# 继续搜索
# 走步生成
moves = [x for x in range(9) if self.board[x]==0]
# 初始化最优值
bestvalue = -1000000
for pos in moves:
self.go(pos)
value = -self.nega_max(depth-1)
self.cancel_go(pos)
if value>=bestvalue:
bestvalue = value
if depth == self.depth:
self.bestmove = pos
return bestvalue
# alpha_beta算法
def alpha_beta(self,depth,alpha,beta):
# 搜索深度耗尽或者某一方获胜
winner = self.get_winner()
if winner!=0 or depth==0:
return self.evaluate_nega_max()
# 继续搜索
# 走步生成
moves = [x for x in range(9) if self.board[x]==0]
for pos in moves:
self.go(pos)
value = -self.alpha_beta(depth-1,-beta,-alpha)
self.cancel_go(pos)
if value>=beta:
if self.depth==depth:
self.bestmove = pos
return beta
if value>alpha:
if self.depth==depth:
self.bestmove = pos
alpha = value
return alpha
# 8是深度,因为估值函数的问题,这里深度要大于等于8才能算正常落子
game = TicTacToe(8)
while game.get_winner()==0:
# game.min_max(game.depth)
# game.nega_max(game.depth)
game.alpha_beta(game.depth,-1000000,1000000)
game.go(game.bestmove)
game.print_board()
print("result:",game.get_winner())