Spaces:

XDimLab
/

hugsim_web_server_0

Paused

App Files Files Community

hugsim_web_server_0 / web_server.py

Libra-1995

fix: lock

a125da9 3 months ago

raw

history blame

18.6 kB

	import sys
	import os
	import pickle
	import json
	import threading
	import io
	import enum
	import hugsim_env
	import subprocess as sp
	from collections import deque, OrderedDict
	from datetime import datetime
	from typing import Any, Dict, Optional, List
	from dataclasses import dataclass
	sys.path.append(os.getcwd())

	from fastapi import FastAPI, Body, Header, Depends, HTTPException, Query
	from fastapi.responses import HTMLResponse, Response
	from omegaconf import OmegaConf, DictConfig
	from huggingface_hub import HfApi
	import open3d as o3d
	import numpy as np
	import gymnasium
	import uvicorn
	import psutil
	import torch

	from sim.utils.sim_utils import traj2control, traj_transform_to_global
	from sim.utils.score_calculator import hugsim_evaluate

	ADMIN_TOKEN = os.getenv('ADMIN_TOKEN', None)
	HF_TOKEN = os.getenv('HF_TOKEN', None)
	COMPETITION_ID = os.getenv('COMPETITION_ID', None)

	hf_api = HfApi(token=HF_TOKEN)


	class SubmissionStatus(enum.Enum):
	PENDING = 0
	QUEUED = 1
	PROCESSING = 2
	SUCCESS = 3
	FAILED = 4


	def get_gpu_memory():
	output_to_list = lambda x: x.decode('ascii').split('\n')[:-1]
	COMMAND = "nvidia-smi --query-gpu=memory.used --format=csv"
	try:
	memory_use_info = output_to_list(sp.check_output(COMMAND.split(),stderr=sp.STDOUT))[1:]
	except sp.CalledProcessError as e:
	raise RuntimeError("command '{}' return with error (code {}): {}".format(e.cmd, e.returncode, e.output))
	memory_use_values = [int(x.split()[0]) for x in memory_use_info]
	return memory_use_values


	def get_system_status():
	cpu_percent = psutil.cpu_percent(interval=1)
	cpu_count = psutil.cpu_count(logical=True)

	virtual_mem = psutil.virtual_memory()
	total_mem = virtual_mem.total / (1024 ** 3)
	used_mem = virtual_mem.used / (1024 ** 3)
	mem_percent = virtual_mem.percent

	system_info = {
	"cpu_percent": cpu_percent,
	"cpu_count": cpu_count,
	"total_memory_gb": round(total_mem, 2),
	"used_memory_gb": round(used_mem, 2),
	"memory_percent": mem_percent,
	"gpus": get_gpu_memory(),
	}

	return system_info


	def get_token_info(token: str) -> Dict[str, Any]:
	token_info_path = hf_api.hf_hub_download(
	repo_id=COMPETITION_ID,
	filename=f"token_data_info/{token}.json",
	repo_type="dataset",
	)

	with open(token_info_path, 'r') as f:
	token_info = json.load(f)

	return token_info


	def download_submission_info(team_id: str) -> Dict[str, Any]:
	"""
	Download the submission info from Hugging Face Hub.
	Args:
	team_id (str): The team ID.
	Returns:
	Dict[str, Any]: The submission info.
	"""
	submission_info_path = hf_api.hf_hub_download(
	repo_id=COMPETITION_ID,
	filename=f"submission_info/{team_id}.json",
	repo_type="dataset",
	)
	with open(submission_info_path, 'r') as f:
	submission_info = json.load(f)

	return submission_info


	def upload_submission_info(team_id: str, user_submission_info: Dict[str, Any]):
	user_submission_info_json = json.dumps(user_submission_info, indent=4)
	user_submission_info_json_bytes = user_submission_info_json.encode("utf-8")
	user_submission_info_json_buffer = io.BytesIO(user_submission_info_json_bytes)
	hf_api.upload_file(
	path_or_fileobj=user_submission_info_json_buffer,
	path_in_repo=f"submission_info/{team_id}.json",
	repo_id=COMPETITION_ID,
	repo_type="dataset",
	)


	def update_submission_status(team_id: str, submission_id: str, status: int):
	user_submission_info = download_submission_info(team_id)
	for submission in user_submission_info["submissions"]:
	if submission["submission_id"] == submission_id:
	submission["status"] = status
	break
	upload_submission_info(team_id, user_submission_info)


	def delete_client_space(client_space_id: str):
	try:
	hf_api.delete_repo(
	repo_id=client_space_id,
	repo_type="space"
	)
	except:
	print(f"Failed to delete space {client_space_id}. It may not exist or already deleted.")


	class FifoDict:
	def __init__(self, max_size: int):
	self.max_size = max_size
	self._order_dict = OrderedDict()
	self.locker = threading.Lock()

	def push(self, key: str, value: Any):
	with self.locker:
	if key in self._order_dict:
	self._order_dict.move_to_end(key)
	return
	if len(self._order_dict) >= self.max_size:
	self._order_dict.popitem(last=False)
	self._order_dict[key] = value

	def get(self, key: str) -> Any:
	return self._order_dict.get(key, None)


	@dataclass
	class SceneConfig:
	name: str
	cfg: DictConfig


	@dataclass
	class EnvExecuteResult:
	cur_scene_done: bool
	done: bool


	class EnvHandler:
	"""A class to handle the environment for HUGSim.
	This can include multiple scene and configurations.
	"""
	def __init__(self, scene_list: List[SceneConfig], base_output: str):
	self._lock = threading.Lock()
	self.scene_list = scene_list
	self.base_output = base_output
	self.env = None
	self.reset_env()

	def _switch_scene(self, scene_index: int):
	"""
	Switch to a different scene based on the index.
	Args:
	scene_index (int): The index of the scene to switch to.
	"""
	if scene_index < 0 or scene_index >= len(self.scene_list):
	raise ValueError("Invalid scene index.")

	self.cur_scene_index = scene_index
	scene_config = self.scene_list[scene_index]
	self.close()
	self.cur_otuput = os.path.join(self.base_output, scene_config.name)
	self.env = gymnasium.make('hugsim_env/HUGSim-v0', cfg=scene_config.cfg, output=self.cur_otuput)
	self._scene_cnt = 0
	self._scene_done = False
	self._save_data = {'type': 'closeloop', 'frames': []}
	self._obs, self._info = self.env.reset()

	self._log(f"Switched to scene: {scene_config.name}")

	def close(self):
	"""
	Close the environment and release resources.
	"""
	if self.env is not None:
	del self.env
	self.env = None
	self._log("Environment closed.")

	def reset_env(self):
	"""
	Reset the environment and initialize variables.
	"""
	self._done = False
	self._switch_scene(0)
	self._log_list = deque(maxlen=100)
	self._log("Environment reset complete.")

	def get_current_state(self):
	"""
	Get the current state of the environment.
	"""
	return {
	"obs": self._obs,
	"info": self._info,
	}

	@property
	def has_done(self) -> bool:
	"""
	Check if the episode is done.
	Returns:
	bool: True if the episode is done, False otherwise.
	"""
	return self._done

	@property
	def has_scene_done(self) -> bool:
	"""
	Check if the current scene is done.
	Returns:
	bool: True if the current scene is done, False otherwise.
	"""
	return self._scene_done

	@property
	def log_list(self) -> deque:
	"""
	Get the log list.
	Returns:
	deque: The log list containing recent log messages.
	"""
	return self._log_list

	def execute_action(self, plan_traj: np.ndarray) -> EnvExecuteResult:
	"""
	Execute the action based on the planned trajectory.
	Args:
	plan_traj (Any): The planned trajectory to follow.
	Returns:
	bool: True if the episode is done, False otherwise.
	"""
	acc, steer_rate = traj2control(plan_traj, self._info)
	action = {'acc': acc, 'steer_rate': steer_rate}
	self._log("Executing action:", action)

	self._obs, _, terminated, truncated, self._info = self.env.step(action)
	self._scene_cnt += 1
	self._scene_done = terminated or truncated or self._scene_cnt > 400

	imu_plan_traj = plan_traj[:, [1, 0]]
	imu_plan_traj[:, 1] *= -1
	global_traj = traj_transform_to_global(imu_plan_traj, self._info['ego_box'])
	self._save_data['frames'].append({
	'time_stamp': self._info['timestamp'],
	'is_key_frame': True,
	'ego_box': self._info['ego_box'],
	'obj_boxes': self._info['obj_boxes'],
	'obj_names': ['car' for _ in self._info['obj_boxes']],
	'planned_traj': {
	'traj': global_traj,
	'timestep': 0.5
	},
	'collision': self._info['collision'],
	'rc': self._info['rc']
	})

	if not self._scene_done:
	return EnvExecuteResult(cur_scene_done=False, done=False)

	with open(os.path.join(self.cur_otuput, 'data.pkl'), 'wb') as wf:
	pickle.dump([self._save_data], wf)

	ground_xyz = np.asarray(o3d.io.read_point_cloud(os.path.join(self.cur_otuput, 'ground.ply')).points)
	scene_xyz = np.asarray(o3d.io.read_point_cloud(os.path.join(self.cur_otuput, 'scene.ply')).points)
	results = hugsim_evaluate([self._save_data], ground_xyz, scene_xyz)
	with open(os.path.join(self.cur_otuput, 'eval.json'), 'w') as f:
	json.dump(results, f)

	self._log(f"Scene {self.cur_scene_index} completed. Evaluation results saved.")

	if self.cur_scene_index < len(self.scene_list) - 1:
	self._switch_scene(self.cur_scene_index + 1)
	return EnvExecuteResult(cur_scene_done=True, done=False)

	self._done = True
	return EnvExecuteResult(cur_scene_done=True, done=True)

	def _log(self, *messages):
	log_message = f"[{str(datetime.now())}]" + " ".join([str(msg) for msg in messages]) + "\n"
	with self._lock:
	self._log_list.append(log_message)


	class EnvHandlerManager:
	def __init__(self):
	self._env_handlers = {}
	self._lock = threading.Lock()

	def _get_scene_list(self, env_id: str, base_output: str) -> List[SceneConfig]:
	"""
	Load the scene configurations from the YAML files.
	Returns:
	List[SceneConfig]: A list of scene configurations.
	"""
	base_path = os.path.join(os.path.dirname(__file__), 'docker', "web_server_config", 'nuscenes_base.yaml')
	scenario_path = os.path.join(os.path.dirname(__file__), 'docker', "web_server_config", 'scene-0383-medium-00.yaml')
	camera_path = os.path.join(os.path.dirname(__file__), 'docker', "web_server_config", 'nuscenes_camera.yaml')
	kinematic_path = os.path.join(os.path.dirname(__file__), 'docker', "web_server_config", 'kinematic.yaml')

	scenario_config = OmegaConf.load(scenario_path)
	base_config = OmegaConf.load(base_path)
	camera_config = OmegaConf.load(camera_path)
	kinematic_config = OmegaConf.load(kinematic_path)
	cfg = OmegaConf.merge(
	{"scenario": scenario_config},
	{"base": base_config},
	{"camera": camera_config},
	{"kinematic": kinematic_config}
	)

	model_path = os.path.join(cfg.base.model_base, cfg.scenario.scene_name)
	model_config = OmegaConf.load(os.path.join(model_path, 'cfg.yaml'))
	model_config.update({"model_path": "/app/app_datas/PAMI2024/release/ss/scenes/nuscenes/scene-0383"})
	cfg.update(model_config)
	cfg.base.output_dir = base_output
	return [
	SceneConfig(name=cfg.scenario.scene_name, cfg=cfg)
	]

	def _generate_env_handler(self, env_id: str):
	base_output = "/app/app_datas/env_output"
	scene_list = self._get_scene_list(env_id, base_output)
	output = os.path.join(base_output, f"{env_id}_hugsim_env")
	os.makedirs(output, exist_ok=True)
	return EnvHandler(scene_list, base_output=output)

	def exists_env_handler(self, env_id: str) -> bool:
	"""
	Check if the environment handler for the given environment ID exists.
	Args:
	env_id (str): The environment ID.
	Returns:
	bool: True if the environment handler exists, False otherwise.
	"""
	with self._lock:
	return env_id in self._env_handlers

	def get_env_handler(self, env_id: str) -> EnvHandler:
	"""
	Get the environment handler for the given environment ID.
	Args:
	env_id (str): The environment ID.
	Returns:
	EnvHandler: The environment handler instance.
	"""
	with self._lock:
	if env_id not in self._env_handlers:
	self._env_handlers[env_id] = self._generate_env_handler(env_id)
	return self._env_handlers[env_id]

	def close_env_handler(self, env_id: str):
	"""
	Close the environment handler for the given environment ID.
	Args:
	env_id (str): The environment ID.
	"""
	with self._lock:
	env = self._env_handlers.pop(env_id, None)
	self._env_handlers[env_id] = None
	if env is not None:
	env.close()
	torch.cuda.empty_cache()


	app = FastAPI()

	_result_dict= FifoDict(max_size=100)
	env_manager = EnvHandlerManager()


	def _get_env_handler(
	auth_token: Optional[str] = Header(None),
	query_token: Optional[str] = Query(None)
	) -> EnvHandler:
	token = auth_token or query_token
	if not token:
	raise HTTPException(status_code=401, detail="Authorization token is required.")
	try:
	token_info = get_token_info(token)
	except Exception:
	raise HTTPException(status_code=401)

	submission_id = token_info["submission_id"]
	if not env_manager.exists_env_handler(submission_id):
	update_submission_status(token_info["team_id"], submission_id, SubmissionStatus.PROCESSING.value)

	env_handler = env_manager.get_env_handler(submission_id)
	if env_handler is None:
	raise HTTPException(status_code=404, detail="Environment handler already closed.")
	return env_handler


	def _load_numpy_ndarray_json_str(json_str: str) -> np.ndarray:
	"""
	Load a numpy ndarray from a JSON string.
	"""
	data = json.loads(json_str)
	return np.array(data["data"], dtype=data["dtype"]).reshape(data["shape"])


	@app.post("/reset")
	def reset_endpoint(env_handler: EnvHandler = Depends(_get_env_handler)):
	"""
	Reset the environment.
	"""
	env_handler.reset_env()
	return {"success": True}


	@app.get("/get_current_state")
	def get_current_state_endpoint(env_handler: EnvHandler = Depends(_get_env_handler)):
	"""
	Get the current state of the environment.
	"""
	state = env_handler.get_current_state()
	data = {
	"done": env_handler.has_done,
	"cur_scene_done": env_handler.has_scene_done,
	"state": state,
	}
	return Response(content=pickle.dumps(data), media_type="application/octet-stream")


	@app.post("/execute_action")
	def execute_action_endpoint(
	plan_traj: str = Body(..., embed=True),
	transaction_id: str = Body(..., embed=True),
	auth_token: str = Header(...),
	env_handler: EnvHandler = Depends(_get_env_handler)
	):
	"""
	Execute the action based on the planned trajectory.
	Args:
	plan_traj (str): The planned trajectory in JSON format.
	transaction_id (str): The unique transaction ID for caching results.
	env_handler (EnvHandler): The environment handler instance.
	Returns:
	Response: The response containing the execution result.
	"""
	cache_result = _result_dict.get(transaction_id)
	if cache_result is not None:
	return Response(content=cache_result, media_type="application/octet-stream")

	if env_handler.has_done:
	result = pickle.dumps({"done": True, "cur_scene_done": True, "state": env_handler.get_current_state()})
	_result_dict.push(transaction_id, result)
	return Response(content=result, media_type="application/octet-stream")

	plan_traj = _load_numpy_ndarray_json_str(plan_traj)
	execute_result = env_handler.execute_action(plan_traj)
	if execute_result.done:
	token_info = get_token_info(auth_token)
	env_manager.close_env_handler(token_info["submission_id"])
	delete_client_space(token_info["client_space_id"])
	update_submission_status(token_info["team_id"], token_info["submission_id"], SubmissionStatus.SUCCESS.value)
	hf_api.upload_folder(
	repo_id=COMPETITION_ID,
	folder_path=env_handler.base_output,
	repo_type="dataset",
	path_in_repo=f"eval_results/{token_info['submission_id']}",
	)
	result = pickle.dumps({"done": execute_result.done, "cur_scene_done": execute_result.cur_scene_done, "state": env_handler.get_current_state()})
	_result_dict.push(transaction_id, result)
	return Response(content=result, media_type="application/octet-stream")

	state = env_handler.get_current_state()
	result = pickle.dumps({"done": execute_result.done, "cur_scene_done": execute_result.cur_scene_done, "state": state})
	_result_dict.push(transaction_id, result)
	return Response(content=result, media_type="application/octet-stream")


	@app.get("/submition_info")
	def main_page_endpoint(env_handler: EnvHandler = Depends(_get_env_handler)):
	"""
	Endpoint to display the submission logs.
	"""
	log_str = "\n".join(env_handler.log_list)
	html_content = f"""
	<html><body><pre>{log_str}</pre></body></html>
	<script>
	setTimeout(function() {{
	window.location.reload();
	}}, 5000);
	</script>
	"""
	return HTMLResponse(content=html_content)


	@app.get("/")
	def main_page_endpoint(
	admin_token: Optional[str] = Query(None),
	):
	"""
	Main page endpoint to display logs.
	"""
	if admin_token != ADMIN_TOKEN:
	html_content = f"""
	<html>
	running
	</html>
	"""
	return HTMLResponse(content=html_content)

	system_info = get_system_status()
	html_content = f"""
	<html>
	<head>
	<title>System Status</title>
	</head>
	<body>
	<h1>System Status</h1>
	<pre>{json.dumps(system_info, indent=4)}</pre>
	</body>
	</html>
	"""
	return HTMLResponse(content=html_content)


	uvicorn.run(app, host="0.0.0.0", port=7860, workers=1)