Spaces:

broadfield-dev
/

RL

Sleeping

App Files Files Community

RL / app.py

broadfield-dev

Create app.py

29e5977 verified 6 months ago

raw

history blame

3.18 kB

	import numpy as np
	import gradio as gr

	# Define the grid world environment
	class GridWorld:
	def __init__(self, size=4):
	self.size = size
	self.agent_pos = [0, 0]
	self.goal_pos = [size-1, size-1]
	self.obstacles = [(1, 1), (2, 2)]

	def reset(self):
	self.agent_pos = [0, 0]
	return self.agent_pos

	def step(self, action):
	x, y = self.agent_pos

	if action == 0: # Up
	x = max(0, x - 1)
	elif action == 1: # Down
	x = min(self.size - 1, x + 1)
	elif action == 2: # Left
	y = max(0, y - 1)
	elif action == 3: # Right
	y = min(self.size - 1, y + 1)

	self.agent_pos = [x, y]

	if tuple(self.agent_pos) in self.obstacles:
	return self.agent_pos, -10, False, {}
	elif self.agent_pos == self.goal_pos:
	return self.agent_pos, 10, True, {}
	else:
	return self.agent_pos, -1, False, {}

	# Define the RL agent
	class QLearningAgent:
	def __init__(self, env, alpha=0.1, gamma=0.9, epsilon=0.1):
	self.env = env
	self.alpha = alpha
	self.gamma = gamma
	self.epsilon = epsilon
	self.q_table = np.zeros((env.size, env.size, 4))

	def choose_action(self, state):
	if np.random.uniform(0, 1) < self.epsilon:
	return np.random.choice(4)
	else:
	return np.argmax(self.q_table[state[0], state[1]])

	def learn(self, state, action, reward, next_state):
	best_next_action = np.argmax(self.q_table[next_state[0], next_state[1]])
	td_target = reward + self.gamma * self.q_table[next_state[0], next_state[1], best_next_action]
	td_error = td_target - self.q_table[state[0], state[1], action]
	self.q_table[state[0], state[1], action] += self.alpha * td_error

	# Initialize the environment and agent
	env = GridWorld()
	agent = QLearningAgent(env)


	def visualize_grid(agent_pos, goal_pos, obstacles):
	grid = np.zeros((env.size, env.size), dtype=str)
	grid[agent_pos[0], agent_pos[1]] = 'A'
	grid[goal_pos[0], goal_pos[1]] = 'G'
	for obstacle in obstacles:
	grid[obstacle[0], obstacle[1]] = 'X'
	return '\n'.join([' '.join(row) for row in grid])

	def train_agent(steps=100):
	state = env.reset()
	for _ in range(steps):
	action = agent.choose_action(state)
	next_state, reward, done, _ = env.step(action)
	agent.learn(state, action, reward, next_state)
	state = next_state
	if done:
	break
	return visualize_grid(env.agent_pos, env.goal_pos, env.obstacles)

	# Create the Gradio interface
	input_steps = gr.Slider(1, 1000, value=100, label="Number of Training Steps")
	output_grid = gr.Textbox(label="Grid World")

	# Define the Gradio interface function
	def update_grid(steps):
	return train_agent(steps)

	# Create the Gradio interface
	iface = gr.Interface(
	fn=update_grid,
	inputs=[input_steps],
	outputs=[output_grid],
	title="Reinforcement Learning with Grid World",
	description="Train a Q-learning agent to navigate a grid world and visualize the results."
	)

	# Launch the interface
	iface.launch()