broadfield-dev commited on
Commit
4e6ab50
·
verified ·
1 Parent(s): 709dfbe

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -106
app.py CHANGED
@@ -1,110 +1,15 @@
1
- import numpy as np
2
- import gradio as gr
3
- import pandas as pd
4
 
5
- # Define the grid world environment
6
- class GridWorld:
7
- def __init__(self, size=4):
8
- self.size = size
9
- self.agent_pos = [0, 0]
10
- self.goal_pos = [size-1, size-1]
11
- self.obstacles = [(1, 1), (2, 2)]
12
 
13
- def reset(self):
14
- self.agent_pos = [0, 0]
15
- return self.agent_pos
16
 
17
- def step(self, action):
18
- x, y = self.agent_pos
 
 
19
 
20
- if action == 0: # Up
21
- x = max(0, x - 1)
22
- elif action == 1: # Down
23
- x = min(self.size - 1, x + 1)
24
- elif action == 2: # Left
25
- y = max(0, y - 1)
26
- elif action == 3: # Right
27
- y = min(self.size - 1, y + 1)
28
-
29
- self.agent_pos = [x, y]
30
-
31
- if tuple(self.agent_pos) in self.obstacles:
32
- return self.agent_pos, -10, False, {}
33
- elif self.agent_pos == self.goal_pos:
34
- return self.agent_pos, 10, True, {}
35
- else:
36
- return self.agent_pos, -1, False, {}
37
-
38
- # Define the RL agent
39
- class QLearningAgent:
40
- def __init__(self, env, alpha=0.1, gamma=0.9, epsilon=0.1):
41
- self.env = env
42
- self.alpha = alpha
43
- self.gamma = gamma
44
- self.epsilon = epsilon
45
- self.q_table = np.zeros((env.size, env.size, 4))
46
-
47
- def choose_action(self, state):
48
- if np.random.uniform(0, 1) < self.epsilon:
49
- return np.random.choice(4)
50
- else:
51
- return np.argmax(self.q_table[state[0], state[1]])
52
-
53
- def learn(self, state, action, reward, next_state):
54
- best_next_action = np.argmax(self.q_table[next_state[0], next_state[1]])
55
- td_target = reward + self.gamma * self.q_table[next_state[0], next_state[1], best_next_action]
56
- td_error = td_target - self.q_table[state[0], state[1], action]
57
- self.q_table[state[0], state[1], action] += self.alpha * td_error
58
-
59
- # Initialize the environment and agent
60
- env = GridWorld()
61
- agent = QLearningAgent(env)
62
-
63
- def visualize_grid(agent_pos, goal_pos, obstacles, path=None):
64
- grid = np.zeros((env.size, env.size), dtype=str)
65
- grid[agent_pos[0], agent_pos[1]] = 'A'
66
- grid[goal_pos[0], goal_pos[1]] = 'G'
67
- for obstacle in obstacles:
68
- grid[obstacle[0], obstacle[1]] = 'X'
69
-
70
- if path:
71
- for step in path:
72
- grid[step[0], step[1]] = 'P'
73
-
74
- # Convert grid to a DataFrame for Gradio
75
- grid_df = pd.DataFrame(grid, index=range(env.size), columns=range(env.size))
76
- return grid_df
77
-
78
- def train_agent(steps=100):
79
- state = env.reset()
80
- path = [env.agent_pos]
81
- for _ in range(steps):
82
- action = agent.choose_action(state)
83
- next_state, reward, done, _ = env.step(action)
84
- agent.learn(state, action, reward, next_state)
85
- state = next_state
86
- path.append(env.agent_pos)
87
- if done:
88
- break
89
- grid_df = visualize_grid(env.agent_pos, env.goal_pos, env.obstacles, path)
90
- return grid_df
91
-
92
- # Create the Gradio interface
93
- input_steps = gr.Slider(1, 1000, value=100, label="Number of Training Steps")
94
- output_grid = gr.Dataframe(label="Grid World")
95
-
96
- # Define the Gradio interface function
97
- def update_grid(steps):
98
- return train_agent(steps)
99
-
100
- # Create the Gradio interface
101
- iface = gr.Interface(
102
- fn=update_grid,
103
- inputs=[input_steps],
104
- outputs=[output_grid],
105
- title="Reinforcement Learning with Grid World",
106
- description="Train a Q-learning agent to navigate a grid world and visualize the results."
107
- )
108
-
109
- # Launch the interface
110
- iface.launch()
 
1
+ # app.py
 
 
2
 
3
+ from flask import Flask
 
 
 
 
 
 
4
 
5
+ # Create a Flask application instance
6
+ app = Flask(__name__)
 
7
 
8
+ # Define a route for the home page
9
+ @app.route('/')
10
+ def home():
11
+ return "Hello, Flask on Port 7860!"
12
 
13
+ # Run the Flask app on port 7860
14
+ if __name__ == '__main__':
15
+ app.run(port=7860)