Spaces:
Sleeping
Sleeping
import numpy as np | |
import gradio as gr | |
# Define the grid world environment | |
class GridWorld: | |
def __init__(self, size=4): | |
self.size = size | |
self.agent_pos = [0, 0] | |
self.goal_pos = [size-1, size-1] | |
self.obstacles = [(1, 1), (2, 2)] | |
def reset(self): | |
self.agent_pos = [0, 0] | |
return self.agent_pos | |
def step(self, action): | |
x, y = self.agent_pos | |
if action == 0: # Up | |
x = max(0, x - 1) | |
elif action == 1: # Down | |
x = min(self.size - 1, x + 1) | |
elif action == 2: # Left | |
y = max(0, y - 1) | |
elif action == 3: # Right | |
y = min(self.size - 1, y + 1) | |
self.agent_pos = [x, y] | |
if tuple(self.agent_pos) in self.obstacles: | |
return self.agent_pos, -10, False, {} | |
elif self.agent_pos == self.goal_pos: | |
return self.agent_pos, 10, True, {} | |
else: | |
return self.agent_pos, -1, False, {} | |
# Define the RL agent | |
class QLearningAgent: | |
def __init__(self, env, alpha=0.1, gamma=0.9, epsilon=0.1): | |
self.env = env | |
self.alpha = alpha | |
self.gamma = gamma | |
self.epsilon = epsilon | |
self.q_table = np.zeros((env.size, env.size, 4)) | |
def choose_action(self, state): | |
if np.random.uniform(0, 1) < self.epsilon: | |
return np.random.choice(4) | |
else: | |
return np.argmax(self.q_table[state[0], state[1]]) | |
def learn(self, state, action, reward, next_state): | |
best_next_action = np.argmax(self.q_table[next_state[0], next_state[1]]) | |
td_target = reward + self.gamma * self.q_table[next_state[0], next_state[1], best_next_action] | |
td_error = td_target - self.q_table[state[0], state[1], action] | |
self.q_table[state[0], state[1], action] += self.alpha * td_error | |
# Initialize the environment and agent | |
env = GridWorld() | |
agent = QLearningAgent(env) | |
def visualize_grid(agent_pos, goal_pos, obstacles): | |
grid = np.zeros((env.size, env.size), dtype=str) | |
grid[agent_pos[0], agent_pos[1]] = 'A' | |
grid[goal_pos[0], goal_pos[1]] = 'G' | |
for obstacle in obstacles: | |
grid[obstacle[0], obstacle[1]] = 'X' | |
return '\n'.join([' '.join(row) for row in grid]) | |
def train_agent(steps=100): | |
state = env.reset() | |
for _ in range(steps): | |
action = agent.choose_action(state) | |
next_state, reward, done, _ = env.step(action) | |
agent.learn(state, action, reward, next_state) | |
state = next_state | |
if done: | |
break | |
return visualize_grid(env.agent_pos, env.goal_pos, env.obstacles) | |
# Create the Gradio interface | |
input_steps = gr.Slider(1, 1000, value=100, label="Number of Training Steps") | |
output_grid = gr.Textbox(label="Grid World") | |
# Define the Gradio interface function | |
def update_grid(steps): | |
return train_agent(steps) | |
# Create the Gradio interface | |
iface = gr.Interface( | |
fn=update_grid, | |
inputs=[input_steps], | |
outputs=[output_grid], | |
title="Reinforcement Learning with Grid World", | |
description="Train a Q-learning agent to navigate a grid world and visualize the results." | |
) | |
# Launch the interface | |
iface.launch() |