Spaces:

broadfield-dev
/

RL

Sleeping

App Files Files Community

RL / app.py

broadfield-dev

Update app.py

b7ed5ce verified 6 months ago

raw

history blame contribute delete

2.54 kB

	# app.py

	from flask import Flask, jsonify, render_template
	import numpy as np

	app = Flask(__name__)

	# Define the maze
	maze = np.array([
	[0, 1, 0, 0, 0],
	[0, 1, 0, 1, 0],
	[0, 0, 0, 1, 0],
	[0, 1, 1, 1, 0],
	[0, 0, 0, 0, 0]
	])

	# Define the target
	target = (4, 4)

	# Initialize the agent's position
	agent_position = (0, 0)

	# Define the Q-table
	q_table = np.zeros((5, 5, 4)) # 5x5 grid, 4 actions (up, down, left, right)

	# Define the reward function
	def get_reward(agent_position):
	if agent_position == target:
	return 100
	elif maze[agent_position] == 1:
	return -10
	return -1

	# Define the action space
	actions = {
	0: (-1, 0), # up
	1: (1, 0), # down
	2: (0, -1), # left
	3: (0, 1) # right
	}

	# Define the training function
	def train_q_table(episodes=1000, alpha=0.1, gamma=0.95, epsilon=0.1):
	global q_table, agent_position

	for _ in range(episodes):
	state = (0, 0)
	while state != target:
	if np.random.uniform(0, 1) < epsilon:
	action = np.random.choice([0, 1, 2, 3])
	else:
	action = np.argmax(q_table[state])

	next_state = tuple(np.array(state) + np.array(actions[action]))

	if next_state[0] < 0 or next_state[0] >= 5 or next_state[1] < 0 or next_state[1] >= 5:
	next_state = state # Stay in the same state if the move is invalid

	reward = get_reward(next_state)
	old_value = q_table[state + (action,)]
	next_max = np.max(q_table[next_state])

	new_value = (1 - alpha) * old_value + alpha * (reward + gamma * next_max)
	q_table[state + (action,)] = new_value

	state = next_state

	agent_position = (0, 0)

	# Train the Q-table
	train_q_table()

	@app.route('/')
	def index():
	return render_template('index.html')

	@app.route('/step')
	def step():
	global agent_position

	action = np.argmax(q_table[agent_position])
	next_state = tuple(np.array(agent_position) + np.array(actions[action]))

	if next_state[0] < 0 or next_state[0] >= 5 or next_state[1] < 0 or next_state[1] >= 5:
	next_state = agent_position # Stay in the same state if the move is invalid

	agent_position = next_state

	# Convert NumPy arrays to lists to ensure JSON serializability
	return jsonify({
	'agent_position': list(agent_position),
	'target': list(target),
	'maze': maze.tolist()
	})

	if __name__ == '__main__':
	app.run(host='0.0.0.0', port=7860)