Project 5: Q-Table Reinforcement Learning Maze Solver
Testing the Maze Solver with the Trained Model
This code uses the created mode to evaluate how well the agent performs in maze-solving problems. Note that most of the video in part 2 were snippets from running this tool which:
- Loads a maze from a JSON file and visualizes it (built with the maze builder but not part of the learning data for the Q-table).
- Loads a pre-trained Q-table model to simulate maze solving (built with the model builder from step 2).
- Creates a simulated environment to test the model's ability to navigate the maze from start to goal.
- Visualizes the solving process step-by-step with real-time updates using Matplotlib.
- Allows user input for step delay to customize the visualization speed.
- Logs detailed information for each step, including state, action, rewards, and Q-values.
import os
import json
import pickle
import random
import numpy as np
import matplotlib.pyplot as plt
from collections import deque
#Config
CONFIG = {
#reward structure
"step_penalty": -0.1, #was -0.1
"goal_reward": 1000, #was 1000
"reward_closer": 2, #was 2
"reward_farther": -1, #was -1
"wall_penalty": -1, #was -1
"exploration_bonus": 1, #was 1
"revisit_penalty": -1, #was -1
#simulation parameters
"max_steps": 1000,
"max_stuck_steps": 5,
"step_delay": 0.025,
}
#maze environment
class MazeEnv:
def __init__(self, maze, start, goal):
self.maze = np.array(maze)
self.start = tuple(start)
self.goal = tuple(goal)
# Define actions in order: right, down, left, up
self.actions = [(0, 1), (1, 0), (0, -1), (-1, 0)]
self.reset()
def reset(self):
self.position = self.start
self.visited_states = {self.start}
return self.position
def step(self, action):
x, y = self.position
dx, dy = self.actions[action]
nx, ny = x + dx, y + dy
if (0 <= nx < self.maze.shape[0] and 0 <= ny < self.maze.shape[1]
and self.maze[nx, ny] == 0):
new_position = (nx, ny)
else:
new_position = self.position
done = False
reward = CONFIG["step_penalty"]
if new_position == self.goal:
reward = CONFIG["goal_reward"]
done = True
else:
old_dist = abs(x - self.goal[0]) + abs(y - self.goal[1])
new_dist = abs(new_position[0] - self.goal[0]) + abs(new_position[1] - self.goal[1])
if new_dist < old_dist:
reward += CONFIG["reward_closer"]
elif new_dist > old_dist:
reward += CONFIG["reward_farther"]
if new_position == self.position:
reward += CONFIG["wall_penalty"]
if new_position not in self.visited_states:
reward += CONFIG["exploration_bonus"]
self.visited_states.add(new_position)
else:
reward += CONFIG["revisit_penalty"]
self.position = new_position
return new_position, reward, done
#helper functions
def load_test_maze(folder, filename):
filepath = os.path.join(folder, filename)
with open(filepath, 'r') as f:
return json.load(f)
def load_q_table(model_folder, model_file):
filepath = os.path.join(model_folder, model_file)
with open(filepath, "rb") as f:
q_table = pickle.load(f)
print(f"Loaded Q-table from {filepath}")
return q_table
def simulate_solver(env, q_table, config=CONFIG):
state = env.reset()
done = False
step_count = 0
state_history = deque(maxlen=10) # Adjust the window size as needed.
plt.ion()
fig, ax = plt.subplots(figsize=(6, 6))
print(f"Simulation start. Start={env.start}, Goal={env.goal}")
while not done and step_count < config["max_steps"]:
step_count += 1
#visualize the maze.
ax.clear()
maze_copy = env.maze.copy()
maze_copy[env.position] = 2 # Mark agent's position.
maze_copy[env.goal] = 3 # Mark goal.
ax.imshow(maze_copy, cmap="binary", origin="upper")
ax.text(env.goal[1], env.goal[0], "E", color="red", fontsize=14,
ha="center", va="center")
ax.text(env.position[1], env.position[0], "S", color="green", fontsize=14,
ha="center", va="center")
plt.draw()
plt.pause(config["step_delay"])
state_history.append(state)
#check if the current state appears too frequently in the recent history.
if state_history.count(state) > config["max_stuck_steps"]:
print(f"Stuck detected at state {state}. Forcing escape with multiple random moves.")
for _ in range(3): # Force 3 consecutive random actions.
action = np.random.randint(len(env.actions))
next_state, reward, done = env.step(action)
step_count += 1
print(f" Forced random move: State={state}, Action={action}, Reward={reward}")
state = next_state
if done:
break
state_history.clear() # Reset history after escape sequence.
if done:
break
continue
else:
q_values = q_table.get(state, [0, 0, 0, 0])
action = np.argmax(q_values)
next_state, reward, done = env.step(action)
print(f"Step {step_count} | State={state}, Action={action}, Reward={reward}, Done={done}")
state = next_state
plt.ioff()
plt.show()
if done:
print(f"Maze solved in {step_count} steps.")
else:
print(f"Hit {step_count} steps without solving the maze.")
#Main function
if __name__ == "__main__":
test_maze_folder = "testMaze"
test_maze_file = "maze_Test.json"
model_folder = "model"
model_file = "q_table.pkl"
print("Loading test maze...")
test_maze_data = load_test_maze(test_maze_folder, test_maze_file)
print("Loading Q-table...")
q_table = load_q_table(model_folder, model_file)
print("Setting up the environment...")
env = MazeEnv(test_maze_data["maze"], test_maze_data["start"], test_maze_data["goal"])
try:
step_delay_input = input("Enter step delay in seconds (default {}): ".format(CONFIG["step_delay"])).strip()
CONFIG["step_delay"] = float(step_delay_input) if step_delay_input else CONFIG["step_delay"]
except ValueError:
pass
print("Starting solver simulation...")
simulate_solver(env, q_table, config=CONFIG)