Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,15 +1,92 @@
|
|
1 |
# app.py
|
2 |
|
3 |
-
from flask import Flask
|
|
|
4 |
|
5 |
-
# Create a Flask application instance
|
6 |
app = Flask(__name__)
|
7 |
|
8 |
-
# Define
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
@app.route('/')
|
10 |
-
def
|
11 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
|
13 |
-
# Run the Flask app on port 7860
|
14 |
if __name__ == '__main__':
|
15 |
app.run(host='0.0.0.0', port=7860)
|
|
|
1 |
# app.py
|
2 |
|
3 |
+
from flask import Flask, jsonify, render_template
|
4 |
+
import numpy as np
|
5 |
|
|
|
6 |
app = Flask(__name__)
|
7 |
|
8 |
+
# Define the maze
|
9 |
+
maze = np.array([
|
10 |
+
[0, 1, 0, 0, 0],
|
11 |
+
[0, 1, 0, 1, 0],
|
12 |
+
[0, 0, 0, 1, 0],
|
13 |
+
[0, 1, 1, 1, 0],
|
14 |
+
[0, 0, 0, 0, 0]
|
15 |
+
])
|
16 |
+
|
17 |
+
# Define the target
|
18 |
+
target = (4, 4)
|
19 |
+
|
20 |
+
# Initialize the agent's position
|
21 |
+
agent_position = (0, 0)
|
22 |
+
|
23 |
+
# Define the Q-table
|
24 |
+
q_table = np.zeros((5, 5, 4)) # 5x5 grid, 4 actions (up, down, left, right)
|
25 |
+
|
26 |
+
# Define the reward function
|
27 |
+
def get_reward(agent_position):
|
28 |
+
if agent_position == target:
|
29 |
+
return 100
|
30 |
+
elif maze[agent_position] == 1:
|
31 |
+
return -10
|
32 |
+
return -1
|
33 |
+
|
34 |
+
# Define the action space
|
35 |
+
actions = {
|
36 |
+
0: (-1, 0), # up
|
37 |
+
1: (1, 0), # down
|
38 |
+
2: (0, -1), # left
|
39 |
+
3: (0, 1) # right
|
40 |
+
}
|
41 |
+
|
42 |
+
# Define the training function
|
43 |
+
def train_q_table(episodes=1000, alpha=0.1, gamma=0.95, epsilon=0.1):
|
44 |
+
global q_table, agent_position
|
45 |
+
|
46 |
+
for _ in range(episodes):
|
47 |
+
state = (0, 0)
|
48 |
+
while state != target:
|
49 |
+
if np.random.uniform(0, 1) < epsilon:
|
50 |
+
action = np.random.choice([0, 1, 2, 3])
|
51 |
+
else:
|
52 |
+
action = np.argmax(q_table[state])
|
53 |
+
|
54 |
+
next_state = tuple(np.array(state) + np.array(actions[action]))
|
55 |
+
|
56 |
+
if next_state[0] < 0 or next_state[0] >= 5 or next_state[1] < 0 or next_state[1] >= 5:
|
57 |
+
next_state = state # Stay in the same state if the move is invalid
|
58 |
+
|
59 |
+
reward = get_reward(next_state)
|
60 |
+
old_value = q_table[state + (action,)]
|
61 |
+
next_max = np.max(q_table[next_state])
|
62 |
+
|
63 |
+
new_value = (1 - alpha) * old_value + alpha * (reward + gamma * next_max)
|
64 |
+
q_table[state + (action,)] = new_value
|
65 |
+
|
66 |
+
state = next_state
|
67 |
+
|
68 |
+
agent_position = (0, 0)
|
69 |
+
|
70 |
+
# Train the Q-table
|
71 |
+
train_q_table()
|
72 |
+
|
73 |
@app.route('/')
|
74 |
+
def index():
|
75 |
+
return render_template('index.html')
|
76 |
+
|
77 |
+
@app.route('/step')
|
78 |
+
def step():
|
79 |
+
global agent_position
|
80 |
+
|
81 |
+
action = np.argmax(q_table[agent_position])
|
82 |
+
next_state = tuple(np.array(agent_position) + np.array(actions[action]))
|
83 |
+
|
84 |
+
if next_state[0] < 0 or next_state[0] >= 5 or next_state[1] < 0 or next_state[1] >= 5:
|
85 |
+
next_state = agent_position # Stay in the same state if the move is invalid
|
86 |
+
|
87 |
+
agent_position = next_state
|
88 |
+
|
89 |
+
return jsonify({'agent_position': agent_position, 'target': target, 'maze': maze.tolist()})
|
90 |
|
|
|
91 |
if __name__ == '__main__':
|
92 |
app.run(host='0.0.0.0', port=7860)
|