2048 - AI cannot dial more than 256

Question

2048 - AI cannot dial more than 256

I am trying to implement AI for 2048 with MiniMax and Alpha-Beta cropping based on snake strategy (see this document) which seems best as a single heuristic.

Unfortunately, the AI does 256 in most games, which is not much better than the empty cell heuristic. I've already read the related threads here but can't seem to find a solution myself.

Here is the code:

import math
from BaseAI_3 import BaseAI

INF_P = math.inf

class PlayerAI(BaseAI):
    move_str = {
        0: "UP",
        1: "DOWN",
        2: "LEFT",
        3: "RIGHT"
    }

    def __init__(self):
        super().__init__()
        self.depth_max = 4

    def getMove(self, grid):
        move_direction, state, utility = self.decision(grid)
        act_move = moves.index(move_direction)
        return moves[act_move] if moves else None

    def get_children(self, grid):
        grid.children = []
        for move_direction in grid.getAvailableMoves():
            gridCopy = grid.clone()
            gridCopy.path = grid.path[:]
            gridCopy.path.append(PlayerAI.move_str[move_direction])
            gridCopy.move(move_direction)
            gridCopy.depth_current = grid.depth_current + 1
            grid.children.append((move_direction, gridCopy))
        return grid.children

    def utility(self, state):

        def snake():
            poses = [
                [
                    [2 ** 15, 2 ** 14, 2 ** 13, 2 ** 12],
                    [2 ** 8, 2 ** 9, 2 ** 10, 2 ** 11],
                    [2 ** 7, 2 ** 6, 2 ** 5, 2 ** 4],
                    [2 ** 0, 2 ** 1, 2 ** 2, 2 ** 3]
                ]
                ,
                [
                   [2 ** 15, 2 ** 8, 2 ** 7, 2 ** 0],
                   [2 ** 14, 2 ** 9, 2 ** 6, 2 ** 1],
                   [2 ** 13, 2 ** 10, 2 ** 5, 2 ** 2],
                   [2 ** 12, 2 ** 11, 2 ** 4, 2 ** 3]
                ]
            ]

            poses.append([item for item in reversed(poses[0])])
            poses.append([list(reversed(item)) for item in reversed(poses[0])])
            poses.append([list(reversed(item)) for item in poses[0]])

            poses.append([item for item in reversed(poses[1])])
            poses.append([list(reversed(item)) for item in reversed(poses[1])])
            poses.append([list(reversed(item)) for item in poses[1]])

            max_value = -INF_P
            for pos in poses:
                value = 0
                for i in range(state.size):
                    for j in range(state.size):
                        value += state.map[i][j] * pos[i][j]

                if value > max_value:
                    max_value = value

            return max_value

        weight_snake = 1 / (2 ** 13)

        value = (
            weight_snake * snake(),
        )

        return value

    def decision(self, state):
        state.depth_current = 1
        state.path = []
        return self.maximize(state, -INF_P, INF_P)

    def terminal_state(self, state):
        return state.depth_current >= self.depth_max

    def maximize(self, state, alpha, beta):
        # terminal-state check
        if self.terminal_state(state):
            return (None, state, self.utility(state))

        max_move_direction, max_child, max_utility = None, None, (-INF_P, )
        for move_direction, child in self.get_children(state):
            _, state2, utility = self.minimize(child, alpha, beta)
            child.utility = utility

            if sum(utility) > sum(max_utility):
                max_move_direction, max_child, max_utility = move_direction, child, utility

            if sum(max_utility) >= beta:
                break

            if sum(max_utility) > alpha:
                alpha = sum(max_utility)

        state.utility = max_utility
        state.alpha = alpha
        state.beta = beta

        return max_move_direction, max_child, max_utility

    def minimize(self, state, alpha, beta):
        # terminal-state check
        if self.terminal_state(state):
            return (None, state, self.utility(state))

        min_move_direction, min_child, min_utility = None, None, (INF_P, )
        for move_direction, child in self.get_children(state):
            _, state2, utility = self.maximize(child, alpha, beta)
            child.utility = utility

            if sum(utility) < sum(min_utility):
                min_move_direction, min_child, min_utility = move_direction, child, utility

            if sum(min_utility) <= alpha:
                break

            if sum(min_utility) < beta:
                beta = sum(min_utility)

        state.utility = min_utility
        state.alpha = alpha
        state.beta = beta

        return min_move_direction, min_child, min_utility

grid

is an object, grid.map

is a two-dimensional array (list of lists).

Do I have errors? How can I paste the code?

Added game log: https://pastebin.com/eyzgU2dN

+3

python artificial-intelligence 2048 alpha-beta-pruning minimax

Keeper June 30. 17 at 12:48

source to share

1 answer

Keeper · Accepted Answer · 2017-07-03T08:41:55+0000

Over the weekend, I realized that the algorithm was not properly implemented. The error in the function minimize()

where I was looking for children is wrong - it should be like this:

def get_opponent_children(self, grid):
    grid.children = []
    for x in range(grid.size):
        for y in range(grid.size):
            if grid.map[x][y] == 0:
                for c in (2, 4):
                    gridCopy = grid.clone()
                    gridCopy.path = grid.path[:]
                    gridCopy.deep_current = grid.deep_current + 1
                    gridCopy.map[x][y] = c
                    grid.children.append((None, gridCopy))

    return grid.children

and the corresponding change:

for move_direction, child in self.get_opponent_children(state):

It is now okay to hit 1024 and 2048 more times.

2048 - AI cannot dial more than 256

More articles: