Spaces:

broadfield-dev
/

RL

Sleeping

App Files Files Community

broadfield-dev commited on Feb 16

Commit

dc75a1c

verified ·

1 Parent(s): 2f18fbc

Update app.py

Browse files

Files changed (1) hide show

app.py +83 -6

app.py CHANGED Viewed

@@ -1,15 +1,92 @@
 # app.py
-from flask import Flask
-# Create a Flask application instance
 app = Flask(__name__)
-# Define a route for the home page
 @app.route('/')
-def home():
-    return "Hello, Flask on Port 7860!"
-# Run the Flask app on port 7860
 if __name__ == '__main__':
     app.run(host='0.0.0.0', port=7860)

 # app.py
+from flask import Flask, jsonify, render_template
+import numpy as np
 app = Flask(__name__)
+# Define the maze
+maze = np.array([
+    [0, 1, 0, 0, 0],
+    [0, 1, 0, 1, 0],
+    [0, 0, 0, 1, 0],
+    [0, 1, 1, 1, 0],
+    [0, 0, 0, 0, 0]
+])
+# Define the target
+target = (4, 4)
+# Initialize the agent's position
+agent_position = (0, 0)
+# Define the Q-table
+q_table = np.zeros((5, 5, 4))  # 5x5 grid, 4 actions (up, down, left, right)
+# Define the reward function
+def get_reward(agent_position):
+    if agent_position == target:
+        return 100
+    elif maze[agent_position] == 1:
+        return -10
+    return -1
+# Define the action space
+actions = {
+    0: (-1, 0),  # up
+    1: (1, 0),   # down
+    2: (0, -1),  # left
+    3: (0, 1)   # right
+}
+# Define the training function
+def train_q_table(episodes=1000, alpha=0.1, gamma=0.95, epsilon=0.1):
+    global q_table, agent_position
+    for _ in range(episodes):
+        state = (0, 0)
+        while state != target:
+            if np.random.uniform(0, 1) < epsilon:
+                action = np.random.choice([0, 1, 2, 3])
+            else:
+                action = np.argmax(q_table[state])
+            next_state = tuple(np.array(state) + np.array(actions[action]))
+            if next_state[0] < 0 or next_state[0] >= 5 or next_state[1] < 0 or next_state[1] >= 5:
+                next_state = state  # Stay in the same state if the move is invalid
+            reward = get_reward(next_state)
+            old_value = q_table[state + (action,)]
+            next_max = np.max(q_table[next_state])
+            new_value = (1 - alpha) * old_value + alpha * (reward + gamma * next_max)
+            q_table[state + (action,)] = new_value
+            state = next_state
+    agent_position = (0, 0)
+# Train the Q-table
+train_q_table()
 @app.route('/')
+def index():
+    return render_template('index.html')
+@app.route('/step')
+def step():
+    global agent_position
+    action = np.argmax(q_table[agent_position])
+    next_state = tuple(np.array(agent_position) + np.array(actions[action]))
+    if next_state[0] < 0 or next_state[0] >= 5 or next_state[1] < 0 or next_state[1] >= 5:
+        next_state = agent_position  # Stay in the same state if the move is invalid
+    agent_position = next_state
+    return jsonify({'agent_position': agent_position, 'target': target, 'maze': maze.tolist()})
 if __name__ == '__main__':
     app.run(host='0.0.0.0', port=7860)