Project 5: Q-Table Reinforcement Learning Maze Solver

Testing the Maze Solver with the Trained Model

This code uses the created mode to evaluate how well the agent performs in maze-solving problems. Note that most of the video in part 2 were snippets from running this tool which:

  • Loads a maze from a JSON file and visualizes it (built with the maze builder but not part of the learning data for the Q-table).
  • Loads a pre-trained Q-table model to simulate maze solving (built with the model builder from step 2).
  • Creates a simulated environment to test the model's ability to navigate the maze from start to goal.
  • Visualizes the solving process step-by-step with real-time updates using Matplotlib.
  • Allows user input for step delay to customize the visualization speed.
  • Logs detailed information for each step, including state, action, rewards, and Q-values.



import os
import json
import pickle
import random
import numpy as np
import matplotlib.pyplot as plt
from collections import deque

#Config

CONFIG = {
	#reward structure
	"step_penalty": -0.1, #was -0.1
	"goal_reward": 1000, #was 1000
	"reward_closer": 2, #was 2
	"reward_farther": -1, #was -1
	"wall_penalty": -1, #was -1
	"exploration_bonus": 1, #was 1
	"revisit_penalty": -1, #was -1

	#simulation parameters
	"max_steps": 1000,
	"max_stuck_steps": 5,
	"step_delay": 0.025,
}

#maze environment
class MazeEnv:
	
	def __init__(self, maze, start, goal):
		self.maze = np.array(maze)
		self.start = tuple(start)
		self.goal = tuple(goal)
		# Define actions in order: right, down, left, up
		self.actions = [(0, 1), (1, 0), (0, -1), (-1, 0)]
		self.reset()

	def reset(self):
		self.position = self.start
		self.visited_states = {self.start}
		return self.position

	def step(self, action):
		x, y = self.position
		dx, dy = self.actions[action]
		nx, ny = x + dx, y + dy

		if (0 <= nx < self.maze.shape[0] and 0 <= ny < self.maze.shape[1]
			and self.maze[nx, ny] == 0):
			new_position = (nx, ny)
		else:
			new_position = self.position

		done = False
		reward = CONFIG["step_penalty"]

		if new_position == self.goal:
			reward = CONFIG["goal_reward"]
			done = True
		else:
			old_dist = abs(x - self.goal[0]) + abs(y - self.goal[1])
			new_dist = abs(new_position[0] - self.goal[0]) + abs(new_position[1] - self.goal[1])
			if new_dist < old_dist:
				reward += CONFIG["reward_closer"]
			elif new_dist > old_dist:
				reward += CONFIG["reward_farther"]

			if new_position == self.position:
				reward += CONFIG["wall_penalty"]

			if new_position not in self.visited_states:
				reward += CONFIG["exploration_bonus"]
				self.visited_states.add(new_position)
			else:
				reward += CONFIG["revisit_penalty"]

		self.position = new_position
		return new_position, reward, done

#helper functions
def load_test_maze(folder, filename):
	filepath = os.path.join(folder, filename)
	with open(filepath, 'r') as f:
		return json.load(f)

def load_q_table(model_folder, model_file):
	filepath = os.path.join(model_folder, model_file)
	with open(filepath, "rb") as f:
		q_table = pickle.load(f)
	print(f"Loaded Q-table from {filepath}")
	return q_table

def simulate_solver(env, q_table, config=CONFIG):
	state = env.reset()
	done = False
	step_count = 0

	state_history = deque(maxlen=10)  # Adjust the window size as needed.
	
	plt.ion()
	fig, ax = plt.subplots(figsize=(6, 6))

	print(f"Simulation start. Start={env.start}, Goal={env.goal}")

	while not done and step_count < config["max_steps"]:
		step_count += 1

		#visualize the maze.
		ax.clear()
		maze_copy = env.maze.copy()
		maze_copy[env.position] = 2  # Mark agent's position.
		maze_copy[env.goal] = 3      # Mark goal.
		ax.imshow(maze_copy, cmap="binary", origin="upper")
		ax.text(env.goal[1], env.goal[0], "E", color="red", fontsize=14,
				ha="center", va="center")
		ax.text(env.position[1], env.position[0], "S", color="green", fontsize=14,
				ha="center", va="center")
		plt.draw()
		plt.pause(config["step_delay"])

		state_history.append(state)

		#check if the current state appears too frequently in the recent history.
		if state_history.count(state) > config["max_stuck_steps"]:
			print(f"Stuck detected at state {state}. Forcing escape with multiple random moves.")
			for _ in range(3):  # Force 3 consecutive random actions.
				action = np.random.randint(len(env.actions))
				next_state, reward, done = env.step(action)
				step_count += 1
				print(f"  Forced random move: State={state}, Action={action}, Reward={reward}")
				state = next_state
				if done:
					break
			state_history.clear()  # Reset history after escape sequence.
			if done:
				break
			continue
		else:
			q_values = q_table.get(state, [0, 0, 0, 0])
			action = np.argmax(q_values)

		next_state, reward, done = env.step(action)
		print(f"Step {step_count} | State={state}, Action={action}, Reward={reward}, Done={done}")

		state = next_state

	plt.ioff()
	plt.show()

	if done:
		print(f"Maze solved in {step_count} steps.")
	else:
		print(f"Hit {step_count} steps without solving the maze.")

#Main function
if __name__ == "__main__":
	test_maze_folder = "testMaze"
	test_maze_file = "maze_Test.json"
	model_folder = "model"
	model_file = "q_table.pkl"

	print("Loading test maze...")
	test_maze_data = load_test_maze(test_maze_folder, test_maze_file)

	print("Loading Q-table...")
	q_table = load_q_table(model_folder, model_file)

	print("Setting up the environment...")
	env = MazeEnv(test_maze_data["maze"], test_maze_data["start"], test_maze_data["goal"])

	try:
		step_delay_input = input("Enter step delay in seconds (default {}): ".format(CONFIG["step_delay"])).strip()
		CONFIG["step_delay"] = float(step_delay_input) if step_delay_input else CONFIG["step_delay"]
	except ValueError:
		pass

	print("Starting solver simulation...")
	simulate_solver(env, q_table, config=CONFIG)