Spaces:
Sleeping
Sleeping
# app.py | |
from flask import Flask, jsonify, render_template | |
import numpy as np | |
app = Flask(__name__) | |
# Define the maze | |
maze = np.array([ | |
[0, 1, 0, 0, 0], | |
[0, 1, 0, 1, 0], | |
[0, 0, 0, 1, 0], | |
[0, 1, 1, 1, 0], | |
[0, 0, 0, 0, 0] | |
]) | |
# Define the target | |
target = (4, 4) | |
# Initialize the agent's position | |
agent_position = (0, 0) | |
# Define the Q-table | |
q_table = np.zeros((5, 5, 4)) # 5x5 grid, 4 actions (up, down, left, right) | |
# Define the reward function | |
def get_reward(agent_position): | |
if agent_position == target: | |
return 100 | |
elif maze[agent_position] == 1: | |
return -10 | |
return -1 | |
# Define the action space | |
actions = { | |
0: (-1, 0), # up | |
1: (1, 0), # down | |
2: (0, -1), # left | |
3: (0, 1) # right | |
} | |
# Define the training function | |
def train_q_table(episodes=1000, alpha=0.1, gamma=0.95, epsilon=0.1): | |
global q_table, agent_position | |
for _ in range(episodes): | |
state = (0, 0) | |
while state != target: | |
if np.random.uniform(0, 1) < epsilon: | |
action = np.random.choice([0, 1, 2, 3]) | |
else: | |
action = np.argmax(q_table[state]) | |
next_state = tuple(np.array(state) + np.array(actions[action])) | |
if next_state[0] < 0 or next_state[0] >= 5 or next_state[1] < 0 or next_state[1] >= 5: | |
next_state = state # Stay in the same state if the move is invalid | |
reward = get_reward(next_state) | |
old_value = q_table[state + (action,)] | |
next_max = np.max(q_table[next_state]) | |
new_value = (1 - alpha) * old_value + alpha * (reward + gamma * next_max) | |
q_table[state + (action,)] = new_value | |
state = next_state | |
agent_position = (0, 0) | |
# Train the Q-table | |
train_q_table() | |
def index(): | |
return render_template('index.html') | |
def step(): | |
global agent_position | |
action = np.argmax(q_table[agent_position]) | |
next_state = tuple(np.array(agent_position) + np.array(actions[action])) | |
if next_state[0] < 0 or next_state[0] >= 5 or next_state[1] < 0 or next_state[1] >= 5: | |
next_state = agent_position # Stay in the same state if the move is invalid | |
agent_position = next_state | |
# Convert NumPy arrays to lists to ensure JSON serializability | |
return jsonify({ | |
'agent_position': list(agent_position), | |
'target': list(target), | |
'maze': maze.tolist() | |
}) | |
if __name__ == '__main__': | |
app.run(host='0.0.0.0', port=7860) |