# app.py from flask import Flask, jsonify, render_template import numpy as np app = Flask(__name__) # Define the maze maze = np.array([ [0, 1, 0, 0, 0], [0, 1, 0, 1, 0], [0, 0, 0, 1, 0], [0, 1, 1, 1, 0], [0, 0, 0, 0, 0] ]) # Define the target target = (4, 4) # Initialize the agent's position agent_position = (0, 0) # Define the Q-table q_table = np.zeros((5, 5, 4)) # 5x5 grid, 4 actions (up, down, left, right) # Define the reward function def get_reward(agent_position): if agent_position == target: return 100 elif maze[agent_position] == 1: return -10 return -1 # Define the action space actions = { 0: (-1, 0), # up 1: (1, 0), # down 2: (0, -1), # left 3: (0, 1) # right } # Define the training function def train_q_table(episodes=1000, alpha=0.1, gamma=0.95, epsilon=0.1): global q_table, agent_position for _ in range(episodes): state = (0, 0) while state != target: if np.random.uniform(0, 1) < epsilon: action = np.random.choice([0, 1, 2, 3]) else: action = np.argmax(q_table[state]) next_state = tuple(np.array(state) + np.array(actions[action])) if next_state[0] < 0 or next_state[0] >= 5 or next_state[1] < 0 or next_state[1] >= 5: next_state = state # Stay in the same state if the move is invalid reward = get_reward(next_state) old_value = q_table[state + (action,)] next_max = np.max(q_table[next_state]) new_value = (1 - alpha) * old_value + alpha * (reward + gamma * next_max) q_table[state + (action,)] = new_value state = next_state agent_position = (0, 0) # Train the Q-table train_q_table() @app.route('/') def index(): return render_template('index.html') @app.route('/step') def step(): global agent_position action = np.argmax(q_table[agent_position]) next_state = tuple(np.array(agent_position) + np.array(actions[action])) if next_state[0] < 0 or next_state[0] >= 5 or next_state[1] < 0 or next_state[1] >= 5: next_state = agent_position # Stay in the same state if the move is invalid agent_position = next_state # Convert NumPy arrays to lists to ensure JSON serializability return jsonify({ 'agent_position': list(agent_position), 'target': list(target), 'maze': maze.tolist() }) if __name__ == '__main__': app.run(host='0.0.0.0', port=7860)