broadfield-dev commited on
Commit
dc75a1c
·
verified ·
1 Parent(s): 2f18fbc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +83 -6
app.py CHANGED
@@ -1,15 +1,92 @@
1
  # app.py
2
 
3
- from flask import Flask
 
4
 
5
- # Create a Flask application instance
6
  app = Flask(__name__)
7
 
8
- # Define a route for the home page
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  @app.route('/')
10
- def home():
11
- return "Hello, Flask on Port 7860!"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
- # Run the Flask app on port 7860
14
  if __name__ == '__main__':
15
  app.run(host='0.0.0.0', port=7860)
 
1
  # app.py
2
 
3
+ from flask import Flask, jsonify, render_template
4
+ import numpy as np
5
 
 
6
  app = Flask(__name__)
7
 
8
+ # Define the maze
9
+ maze = np.array([
10
+ [0, 1, 0, 0, 0],
11
+ [0, 1, 0, 1, 0],
12
+ [0, 0, 0, 1, 0],
13
+ [0, 1, 1, 1, 0],
14
+ [0, 0, 0, 0, 0]
15
+ ])
16
+
17
+ # Define the target
18
+ target = (4, 4)
19
+
20
+ # Initialize the agent's position
21
+ agent_position = (0, 0)
22
+
23
+ # Define the Q-table
24
+ q_table = np.zeros((5, 5, 4)) # 5x5 grid, 4 actions (up, down, left, right)
25
+
26
+ # Define the reward function
27
+ def get_reward(agent_position):
28
+ if agent_position == target:
29
+ return 100
30
+ elif maze[agent_position] == 1:
31
+ return -10
32
+ return -1
33
+
34
+ # Define the action space
35
+ actions = {
36
+ 0: (-1, 0), # up
37
+ 1: (1, 0), # down
38
+ 2: (0, -1), # left
39
+ 3: (0, 1) # right
40
+ }
41
+
42
+ # Define the training function
43
+ def train_q_table(episodes=1000, alpha=0.1, gamma=0.95, epsilon=0.1):
44
+ global q_table, agent_position
45
+
46
+ for _ in range(episodes):
47
+ state = (0, 0)
48
+ while state != target:
49
+ if np.random.uniform(0, 1) < epsilon:
50
+ action = np.random.choice([0, 1, 2, 3])
51
+ else:
52
+ action = np.argmax(q_table[state])
53
+
54
+ next_state = tuple(np.array(state) + np.array(actions[action]))
55
+
56
+ if next_state[0] < 0 or next_state[0] >= 5 or next_state[1] < 0 or next_state[1] >= 5:
57
+ next_state = state # Stay in the same state if the move is invalid
58
+
59
+ reward = get_reward(next_state)
60
+ old_value = q_table[state + (action,)]
61
+ next_max = np.max(q_table[next_state])
62
+
63
+ new_value = (1 - alpha) * old_value + alpha * (reward + gamma * next_max)
64
+ q_table[state + (action,)] = new_value
65
+
66
+ state = next_state
67
+
68
+ agent_position = (0, 0)
69
+
70
+ # Train the Q-table
71
+ train_q_table()
72
+
73
  @app.route('/')
74
+ def index():
75
+ return render_template('index.html')
76
+
77
+ @app.route('/step')
78
+ def step():
79
+ global agent_position
80
+
81
+ action = np.argmax(q_table[agent_position])
82
+ next_state = tuple(np.array(agent_position) + np.array(actions[action]))
83
+
84
+ if next_state[0] < 0 or next_state[0] >= 5 or next_state[1] < 0 or next_state[1] >= 5:
85
+ next_state = agent_position # Stay in the same state if the move is invalid
86
+
87
+ agent_position = next_state
88
+
89
+ return jsonify({'agent_position': agent_position, 'target': target, 'maze': maze.tolist()})
90
 
 
91
  if __name__ == '__main__':
92
  app.run(host='0.0.0.0', port=7860)