Spaces:

broadfield-dev
/

RL

Sleeping

App Files Files Community

broadfield-dev commited on Feb 16

Commit

fc3cc26

verified ·

1 Parent(s): 29e5977

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -5

app.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import numpy as np
 import gradio as gr
 # Define the grid world environment
 class GridWorld:
@@ -59,29 +60,48 @@ class QLearningAgent:
 env = GridWorld()
 agent = QLearningAgent(env)
-def visualize_grid(agent_pos, goal_pos, obstacles):
     grid = np.zeros((env.size, env.size), dtype=str)
     grid[agent_pos[0], agent_pos[1]] = 'A'
     grid[goal_pos[0], goal_pos[1]] = 'G'
     for obstacle in obstacles:
         grid[obstacle[0], obstacle[1]] = 'X'
-    return '\n'.join([' '.join(row) for row in grid])
 def train_agent(steps=100):
     state = env.reset()
     for _ in range(steps):
         action = agent.choose_action(state)
         next_state, reward, done, _ = env.step(action)
         agent.learn(state, action, reward, next_state)
         state = next_state
         if done:
             break
-    return visualize_grid(env.agent_pos, env.goal_pos, env.obstacles)
 # Create the Gradio interface
 input_steps = gr.Slider(1, 1000, value=100, label="Number of Training Steps")
-output_grid = gr.Textbox(label="Grid World")
 # Define the Gradio interface function
 def update_grid(steps):

 import numpy as np
 import gradio as gr
+import matplotlib.pyplot as plt
 # Define the grid world environment
 class GridWorld:
 env = GridWorld()
 agent = QLearningAgent(env)
+def visualize_grid(agent_pos, goal_pos, obstacles, path=None):
     grid = np.zeros((env.size, env.size), dtype=str)
     grid[agent_pos[0], agent_pos[1]] = 'A'
     grid[goal_pos[0], goal_pos[1]] = 'G'
     for obstacle in obstacles:
         grid[obstacle[0], obstacle[1]] = 'X'
+    if path:
+        for step in path:
+            grid[step[0], step[1]] = 'P'
+    fig, ax = plt.subplots()
+    ax.imshow(grid, cmap='viridis', aspect='equal')
+    ax.set_xticks(np.arange(-0.5, env.size, 1))
+    ax.set_yticks(np.arange(-0.5, env.size, 1))
+    ax.grid(color='w', linestyle='-', linewidth=2)
+    ax.set_xticklabels([])
+    ax.set_yticklabels([])
+    for i in range(env.size):
+        for j in range(env.size):
+            ax.text(j, i, grid[i, j], ha='center', va='center', color='w')
+    return fig
 def train_agent(steps=100):
     state = env.reset()
+    path = [env.agent_pos]
     for _ in range(steps):
         action = agent.choose_action(state)
         next_state, reward, done, _ = env.step(action)
         agent.learn(state, action, reward, next_state)
         state = next_state
+        path.append(env.agent_pos)
         if done:
             break
+    fig = visualize_grid(env.agent_pos, env.goal_pos, env.obstacles, path)
+    return fig
 # Create the Gradio interface
 input_steps = gr.Slider(1, 1000, value=100, label="Number of Training Steps")
+output_grid = gr.Plot(label="Grid World")
 # Define the Gradio interface function
 def update_grid(steps):