Project 6: Reinforcement Gaming Agent
Agent Environment Setup
Deep Q-Networks (DQN) are a type of Reinforcement Learning (RL) algorithm that require an environment where an agent can interact, take actions, and receive rewards to learn optimal behavior. Instead of using a prebuilt Gym environment, we built our own custom Gym environment for our game, so our RL agent can learn in a controlled and tailored setting which we can adjust whenever we like.
- It provides a standardized way for RL agents to interact with a game or simulation.
- It defines state observations, actions, rewards, and termination conditions, making training consistent.
- It allows the agent to receive visual inputs (frames) or structured state representations, which are processed by the Deep Q-Network (DQN) to predict the best actions.
The Environment includes the reward function for the agent.
| Event | Condition | Reward | Description |
|---|---|---|---|
| Base Survival Reward | Every step | +1 + (time_elapsed/1000) | Provides a continuously increasing reward based on the elapsed time, incentivizing the player to survive longer. |
| Wall Collision Penalty | When the player touches the left/right or top/bottom boundaries | -min(25, 5 + (screen_dimension / max(1, player coordinate))) | Penalizes the player for hitting the edges of the screen. The penalty increases as the player gets closer to the boundary, capped at 25. |
| Moving Away from Enemy Bonus | When the player's movement vector is directed away from an enemy (dot product < 0) | +5 per enemy | Rewards evasive maneuvers by giving a bonus when the player moves in the opposite direction from an approaching enemy. |
| Proximity Penalty | For each enemy within 50 pixels of the player | -(50 - distance) × 2 | Penalizes the player for being too close to an enemy, with a larger penalty the closer the enemy is (within a 50-pixel range). |
| Collision Penalty | When the player collides with any enemy | -300 | A severe penalty that results in an immediate game over, discouraging direct collisions with enemies. |
import pygame
import numpy as np
import cv2
import gym
from gym import spaces
from collections import deque
from main import Player, Enemy, spawn_enemy
class SpaceshipDodgerEnv(gym.Env):
def __init__(self):
super(SpaceshipDodgerEnv, self).__init__()
self.screen_width = 900
self.screen_height = 900
self.player_size = 20
self.enemy_radius = 10
pygame.init()
self.screen = pygame.display.set_mode((self.screen_width, self.screen_height))
self.clock = pygame.time.Clock()
self.font = pygame.font.SysFont(None, 36)
self.action_space = spaces.Discrete(4)
self.observation_space = spaces.Box(low=0, high=255, shape=(4, 96, 96), dtype=np.uint8)
self.frame_stack = deque(maxlen=4)
self.last_spawn_time = pygame.time.get_ticks()
self.base_spawn_interval = 2000
self.reset()
def reset(self):
self.player = Player()
self.enemies = pygame.sprite.Group()
self.start_time = pygame.time.get_ticks()
self.running = True
self.spawn_timer = 0
#spawn an initial enemy
player_position = self.player.rect.center
self.enemies.add(spawn_enemy(player_position))
#initial processed frame and fill the stack with it
frame = self._get_processed_frame()
self.frame_stack = deque([frame] * 4, maxlen=4)
return np.concatenate(list(self.frame_stack), axis=0)
def step(self, action):
prev_x, prev_y = self.player.rect.x, self.player.rect.y
#movement based on action
if action == 0: # UP
self.player.rect.y -= self.player.speed
elif action == 1: # DOWN
self.player.rect.y += self.player.speed
elif action == 2: # LEFT
self.player.rect.x -= self.player.speed
elif action == 3: # RIGHT
self.player.rect.x += self.player.speed
time_elapsed = pygame.time.get_ticks() - self.start_time
#survival reward
reward = 1 + (time_elapsed / 1000) * 1.0
done = False
#Wall penalty
if self.player.rect.left == 0 or self.player.rect.right == self.screen_width:
reward -= min(25, 5 + (self.screen_width / max(1, self.player.rect.x)))
if self.player.rect.top == 0 or self.player.rect.bottom == self.screen_height:
reward -= min(25, 5 + (self.screen_height / max(1, self.player.rect.y)))
#player inside screen bounds
self.player.rect.clamp_ip(pygame.Rect(0, 0, self.screen_width, self.screen_height))
#enemy spawning
self.spawn_timer += 1
if self.spawn_timer % np.random.randint(30, 50) == 0:
self.enemies.add(spawn_enemy(self.player.rect.center))
#max number of enemies (dynamically)
max_enemies = min(20, 10 + ((time_elapsed) // 5000))
while len(self.enemies) > max_enemies:
self.enemies.sprites()[0].kill()
self.enemies.update()
#reward for moving away from enemies
for enemy in self.enemies:
enemy_vec = np.array(enemy.rect.center) - np.array(self.player.rect.center)
player_movement_vec = np.array([self.player.rect.x, self.player.rect.y]) - np.array([prev_x, prev_y])
if np.dot(enemy_vec, player_movement_vec) < 0:
reward += 5 #bonus for moving away
distance = np.linalg.norm(np.array(self.player.rect.center) - np.array(enemy.rect.center))
if distance < 50:
reward -= (50 - distance) * 2 #bonus for distance)
#collision penalty
if pygame.sprite.spritecollideany(self.player, self.enemies):
reward -= 300
done = True
#frame stack for observations
new_frame = self._get_processed_frame()
self.frame_stack.append(new_frame)
stacked_frames = np.concatenate(list(self.frame_stack), axis=0)
return stacked_frames, reward, done, {}
def _get_processed_frame(self):
self.screen.fill((0, 0, 0))
self.screen.blit(self.player.image, self.player.rect)
self.enemies.draw(self.screen)
frame = pygame.surfarray.array3d(pygame.display.get_surface())
frame = np.transpose(frame, (1, 0, 2))
frame = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY)
frame = cv2.resize(frame, (96, 96))
return np.expand_dims(frame, axis=0)
def render(self, mode="human"):
if mode == "human":
pygame.display.flip()
else:
pygame.display.iconify()
def close(self):
pygame.quit()