Project 6: Reinforcement Gaming Agent

Agent Environment Setup

Deep Q-Networks (DQN) are a type of Reinforcement Learning (RL) algorithm that require an environment where an agent can interact, take actions, and receive rewards to learn optimal behavior. Instead of using a prebuilt Gym environment, we built our own custom Gym environment for our game, so our RL agent can learn in a controlled and tailored setting which we can adjust whenever we like.

  • It provides a standardized way for RL agents to interact with a game or simulation.
  • It defines state observations, actions, rewards, and termination conditions, making training consistent.
  • It allows the agent to receive visual inputs (frames) or structured state representations, which are processed by the Deep Q-Network (DQN) to predict the best actions.

The Environment includes the reward function for the agent.

Event Condition Reward Description
Base Survival Reward Every step +1 + (time_elapsed/1000) Provides a continuously increasing reward based on the elapsed time, incentivizing the player to survive longer.
Wall Collision Penalty When the player touches the left/right or top/bottom boundaries -min(25, 5 + (screen_dimension / max(1, player coordinate))) Penalizes the player for hitting the edges of the screen. The penalty increases as the player gets closer to the boundary, capped at 25.
Moving Away from Enemy Bonus When the player's movement vector is directed away from an enemy (dot product < 0) +5 per enemy Rewards evasive maneuvers by giving a bonus when the player moves in the opposite direction from an approaching enemy.
Proximity Penalty For each enemy within 50 pixels of the player -(50 - distance) × 2 Penalizes the player for being too close to an enemy, with a larger penalty the closer the enemy is (within a 50-pixel range).
Collision Penalty When the player collides with any enemy -300 A severe penalty that results in an immediate game over, discouraging direct collisions with enemies.

import pygame
import numpy as np
import cv2
import gym
from gym import spaces
from collections import deque
from main import Player, Enemy, spawn_enemy

class SpaceshipDodgerEnv(gym.Env):
	def __init__(self):
		super(SpaceshipDodgerEnv, self).__init__()

		self.screen_width = 900
		self.screen_height = 900
		self.player_size = 20
		self.enemy_radius = 10

		pygame.init()
		self.screen = pygame.display.set_mode((self.screen_width, self.screen_height))
		self.clock = pygame.time.Clock()
		self.font = pygame.font.SysFont(None, 36)

		self.action_space = spaces.Discrete(4)
		self.observation_space = spaces.Box(low=0, high=255, shape=(4, 96, 96), dtype=np.uint8)
		self.frame_stack = deque(maxlen=4)
		self.last_spawn_time = pygame.time.get_ticks()
		self.base_spawn_interval = 2000
		self.reset()

	def reset(self):
		self.player = Player()
		self.enemies = pygame.sprite.Group()
		self.start_time = pygame.time.get_ticks()
		self.running = True
		self.spawn_timer = 0

		#spawn an initial enemy
		player_position = self.player.rect.center
		self.enemies.add(spawn_enemy(player_position))

		#initial processed frame and fill the stack with it
		frame = self._get_processed_frame()
		self.frame_stack = deque([frame] * 4, maxlen=4)

		return np.concatenate(list(self.frame_stack), axis=0)

	def step(self, action):
		prev_x, prev_y = self.player.rect.x, self.player.rect.y
		
		#movement based on action
		if action == 0:  # UP
			self.player.rect.y -= self.player.speed
		elif action == 1:  # DOWN
			self.player.rect.y += self.player.speed
		elif action == 2:  # LEFT
			self.player.rect.x -= self.player.speed
		elif action == 3:  # RIGHT
			self.player.rect.x += self.player.speed

		time_elapsed = pygame.time.get_ticks() - self.start_time
		
		#survival reward
		reward = 1 + (time_elapsed / 1000) * 1.0  
		done = False

		#Wall penalty
		if self.player.rect.left == 0 or self.player.rect.right == self.screen_width:
			reward -= min(25, 5 + (self.screen_width / max(1, self.player.rect.x)))
		if self.player.rect.top == 0 or self.player.rect.bottom == self.screen_height:
			reward -= min(25, 5 + (self.screen_height / max(1, self.player.rect.y)))
		
		#player inside screen bounds
		self.player.rect.clamp_ip(pygame.Rect(0, 0, self.screen_width, self.screen_height))

		#enemy spawning
		self.spawn_timer += 1
		if self.spawn_timer % np.random.randint(30, 50) == 0:
			self.enemies.add(spawn_enemy(self.player.rect.center))

		#max number of enemies (dynamically)
		max_enemies = min(20, 10 + ((time_elapsed) // 5000))
		while len(self.enemies) > max_enemies:
			self.enemies.sprites()[0].kill()

		self.enemies.update()

		#reward for moving away from enemies
		for enemy in self.enemies:
			enemy_vec = np.array(enemy.rect.center) - np.array(self.player.rect.center)
			player_movement_vec = np.array([self.player.rect.x, self.player.rect.y]) - np.array([prev_x, prev_y])
			if np.dot(enemy_vec, player_movement_vec) < 0:
				reward += 5  #bonus for moving away
			distance = np.linalg.norm(np.array(self.player.rect.center) - np.array(enemy.rect.center))
			if distance < 50:
				reward -= (50 - distance) * 2 #bonus for distance)

		#collision penalty
		if pygame.sprite.spritecollideany(self.player, self.enemies):
			reward -= 300
			done = True

		#frame stack for observations
		new_frame = self._get_processed_frame()
		self.frame_stack.append(new_frame)
		stacked_frames = np.concatenate(list(self.frame_stack), axis=0)

		return stacked_frames, reward, done, {}

	def _get_processed_frame(self):
		self.screen.fill((0, 0, 0))
		self.screen.blit(self.player.image, self.player.rect)
		self.enemies.draw(self.screen)
		
		frame = pygame.surfarray.array3d(pygame.display.get_surface())
		frame = np.transpose(frame, (1, 0, 2))
		frame = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY)
		frame = cv2.resize(frame, (96, 96))
		return np.expand_dims(frame, axis=0)

	def render(self, mode="human"):
		if mode == "human":
			pygame.display.flip()
		else:
			pygame.display.iconify()

	def close(self):
		pygame.quit()