Spaces:

broadfield-dev
/

RL

Sleeping

App Files Files Community

broadfield-dev commited on Feb 16

Commit

4e6ab50

verified ·

1 Parent(s): 709dfbe

Update app.py

Browse files

Files changed (1) hide show

app.py +11 -106

app.py CHANGED Viewed

@@ -1,110 +1,15 @@
-import numpy as np
-import gradio as gr
-import pandas as pd
-# Define the grid world environment
-class GridWorld:
-    def __init__(self, size=4):
-        self.size = size
-        self.agent_pos = [0, 0]
-        self.goal_pos = [size-1, size-1]
-        self.obstacles = [(1, 1), (2, 2)]
-    def reset(self):
-        self.agent_pos = [0, 0]
-        return self.agent_pos
-    def step(self, action):
-        x, y = self.agent_pos
-        if action == 0:  # Up
-            x = max(0, x - 1)
-        elif action == 1:  # Down
-            x = min(self.size - 1, x + 1)
-        elif action == 2:  # Left
-            y = max(0, y - 1)
-        elif action == 3:  # Right
-            y = min(self.size - 1, y + 1)
-        self.agent_pos = [x, y]
-        if tuple(self.agent_pos) in self.obstacles:
-            return self.agent_pos, -10, False, {}
-        elif self.agent_pos == self.goal_pos:
-            return self.agent_pos, 10, True, {}
-        else:
-            return self.agent_pos, -1, False, {}
-# Define the RL agent
-class QLearningAgent:
-    def __init__(self, env, alpha=0.1, gamma=0.9, epsilon=0.1):
-        self.env = env
-        self.alpha = alpha
-        self.gamma = gamma
-        self.epsilon = epsilon
-        self.q_table = np.zeros((env.size, env.size, 4))
-    def choose_action(self, state):
-        if np.random.uniform(0, 1) < self.epsilon:
-            return np.random.choice(4)
-        else:
-            return np.argmax(self.q_table[state[0], state[1]])
-    def learn(self, state, action, reward, next_state):
-        best_next_action = np.argmax(self.q_table[next_state[0], next_state[1]])
-        td_target = reward + self.gamma * self.q_table[next_state[0], next_state[1], best_next_action]
-        td_error = td_target - self.q_table[state[0], state[1], action]
-        self.q_table[state[0], state[1], action] += self.alpha * td_error
-# Initialize the environment and agent
-env = GridWorld()
-agent = QLearningAgent(env)
-def visualize_grid(agent_pos, goal_pos, obstacles, path=None):
-    grid = np.zeros((env.size, env.size), dtype=str)
-    grid[agent_pos[0], agent_pos[1]] = 'A'
-    grid[goal_pos[0], goal_pos[1]] = 'G'
-    for obstacle in obstacles:
-        grid[obstacle[0], obstacle[1]] = 'X'
-    if path:
-        for step in path:
-            grid[step[0], step[1]] = 'P'
-    # Convert grid to a DataFrame for Gradio
-    grid_df = pd.DataFrame(grid, index=range(env.size), columns=range(env.size))
-    return grid_df
-def train_agent(steps=100):
-    state = env.reset()
-    path = [env.agent_pos]
-    for _ in range(steps):
-        action = agent.choose_action(state)
-        next_state, reward, done, _ = env.step(action)
-        agent.learn(state, action, reward, next_state)
-        state = next_state
-        path.append(env.agent_pos)
-        if done:
-            break
-    grid_df = visualize_grid(env.agent_pos, env.goal_pos, env.obstacles, path)
-    return grid_df
-# Create the Gradio interface
-input_steps = gr.Slider(1, 1000, value=100, label="Number of Training Steps")
-output_grid = gr.Dataframe(label="Grid World")
-# Define the Gradio interface function
-def update_grid(steps):
-    return train_agent(steps)
-# Create the Gradio interface
-iface = gr.Interface(
-    fn=update_grid,
-    inputs=[input_steps],
-    outputs=[output_grid],
-    title="Reinforcement Learning with Grid World",
-    description="Train a Q-learning agent to navigate a grid world and visualize the results."
-)
-# Launch the interface
-iface.launch()

+# app.py
+from flask import Flask
+# Create a Flask application instance
+app = Flask(__name__)
+# Define a route for the home page
+@app.route('/')
+def home():
+    return "Hello, Flask on Port 7860!"
+# Run the Flask app on port 7860
+if __name__ == '__main__':
+    app.run(port=7860)