2048 - AI cannot dial more than 256
I am trying to implement AI for 2048 with MiniMax and Alpha-Beta cropping based on snake strategy (see this document) which seems best as a single heuristic.
Unfortunately, the AI ββdoes 256 in most games, which is not much better than the empty cell heuristic. I've already read the related threads here but can't seem to find a solution myself.
Here is the code:
import math
from BaseAI_3 import BaseAI
INF_P = math.inf
class PlayerAI(BaseAI):
move_str = {
0: "UP",
1: "DOWN",
2: "LEFT",
3: "RIGHT"
}
def __init__(self):
super().__init__()
self.depth_max = 4
def getMove(self, grid):
move_direction, state, utility = self.decision(grid)
act_move = moves.index(move_direction)
return moves[act_move] if moves else None
def get_children(self, grid):
grid.children = []
for move_direction in grid.getAvailableMoves():
gridCopy = grid.clone()
gridCopy.path = grid.path[:]
gridCopy.path.append(PlayerAI.move_str[move_direction])
gridCopy.move(move_direction)
gridCopy.depth_current = grid.depth_current + 1
grid.children.append((move_direction, gridCopy))
return grid.children
def utility(self, state):
def snake():
poses = [
[
[2 ** 15, 2 ** 14, 2 ** 13, 2 ** 12],
[2 ** 8, 2 ** 9, 2 ** 10, 2 ** 11],
[2 ** 7, 2 ** 6, 2 ** 5, 2 ** 4],
[2 ** 0, 2 ** 1, 2 ** 2, 2 ** 3]
]
,
[
[2 ** 15, 2 ** 8, 2 ** 7, 2 ** 0],
[2 ** 14, 2 ** 9, 2 ** 6, 2 ** 1],
[2 ** 13, 2 ** 10, 2 ** 5, 2 ** 2],
[2 ** 12, 2 ** 11, 2 ** 4, 2 ** 3]
]
]
poses.append([item for item in reversed(poses[0])])
poses.append([list(reversed(item)) for item in reversed(poses[0])])
poses.append([list(reversed(item)) for item in poses[0]])
poses.append([item for item in reversed(poses[1])])
poses.append([list(reversed(item)) for item in reversed(poses[1])])
poses.append([list(reversed(item)) for item in poses[1]])
max_value = -INF_P
for pos in poses:
value = 0
for i in range(state.size):
for j in range(state.size):
value += state.map[i][j] * pos[i][j]
if value > max_value:
max_value = value
return max_value
weight_snake = 1 / (2 ** 13)
value = (
weight_snake * snake(),
)
return value
def decision(self, state):
state.depth_current = 1
state.path = []
return self.maximize(state, -INF_P, INF_P)
def terminal_state(self, state):
return state.depth_current >= self.depth_max
def maximize(self, state, alpha, beta):
# terminal-state check
if self.terminal_state(state):
return (None, state, self.utility(state))
max_move_direction, max_child, max_utility = None, None, (-INF_P, )
for move_direction, child in self.get_children(state):
_, state2, utility = self.minimize(child, alpha, beta)
child.utility = utility
if sum(utility) > sum(max_utility):
max_move_direction, max_child, max_utility = move_direction, child, utility
if sum(max_utility) >= beta:
break
if sum(max_utility) > alpha:
alpha = sum(max_utility)
state.utility = max_utility
state.alpha = alpha
state.beta = beta
return max_move_direction, max_child, max_utility
def minimize(self, state, alpha, beta):
# terminal-state check
if self.terminal_state(state):
return (None, state, self.utility(state))
min_move_direction, min_child, min_utility = None, None, (INF_P, )
for move_direction, child in self.get_children(state):
_, state2, utility = self.maximize(child, alpha, beta)
child.utility = utility
if sum(utility) < sum(min_utility):
min_move_direction, min_child, min_utility = move_direction, child, utility
if sum(min_utility) <= alpha:
break
if sum(min_utility) < beta:
beta = sum(min_utility)
state.utility = min_utility
state.alpha = alpha
state.beta = beta
return min_move_direction, min_child, min_utility
grid
is an object, grid.map
is a two-dimensional array (list of lists).
Do I have errors? How can I paste the code?
Added game log: https://pastebin.com/eyzgU2dN
source to share
Over the weekend, I realized that the algorithm was not properly implemented. The error in the function minimize()
where I was looking for children is wrong - it should be like this:
def get_opponent_children(self, grid):
grid.children = []
for x in range(grid.size):
for y in range(grid.size):
if grid.map[x][y] == 0:
for c in (2, 4):
gridCopy = grid.clone()
gridCopy.path = grid.path[:]
gridCopy.deep_current = grid.deep_current + 1
gridCopy.map[x][y] = c
grid.children.append((None, gridCopy))
return grid.children
and the corresponding change:
for move_direction, child in self.get_opponent_children(state):
It is now okay to hit 1024 and 2048 more times.
source to share