hugsim_web_server_0 / web_server.py
hyzhou404's picture
extend time limitation
7bdfd37
import sys
import os
import pickle
import json
import threading
import io
import enum
import hugsim_env
import subprocess as sp
import shutil
import time
from collections import deque, OrderedDict
from datetime import datetime, timezone
from typing import Any, Dict, Optional, List, Tuple
from dataclasses import dataclass
sys.path.append(os.getcwd())
from moviepy import ImageSequenceClip
from fastapi import FastAPI, Body, Header, Depends, HTTPException, Query
from fastapi.responses import HTMLResponse, Response
from omegaconf import OmegaConf, DictConfig
from huggingface_hub import HfApi
import open3d as o3d
import numpy as np
import gymnasium
import uvicorn
import psutil
import torch
from glob import glob
from sim.utils.sim_utils import traj2control, traj_transform_to_global
from sim.utils.score_calculator import hugsim_evaluate
ADMIN_TOKEN = os.getenv('ADMIN_TOKEN', None)
HF_TOKEN = os.getenv('HF_TOKEN', None)
COMPETITION_ID = os.getenv('COMPETITION_ID', None)
hf_api = HfApi(token=HF_TOKEN)
class SubmissionStatus(enum.Enum):
PENDING = 0
QUEUED = 1
PROCESSING = 2
SUCCESS = 3
FAILED = 4
def to_video(observations: List[Any], output_path: str):
frames = []
for obs in observations:
row1 = np.concatenate([obs['CAM_FRONT_LEFT'], obs['CAM_FRONT'], obs['CAM_FRONT_RIGHT']], axis=1)
row2 = np.concatenate([obs['CAM_BACK_RIGHT'], obs['CAM_BACK'], obs['CAM_BACK_LEFT']], axis=1)
frame = np.concatenate([row1, row2], axis=0)
frames.append(frame)
clip = ImageSequenceClip(frames, fps=4)
clip.write_videofile(output_path)
def get_gpu_memory():
output_to_list = lambda x: x.decode('ascii').split('\n')[:-1]
COMMAND = "nvidia-smi --query-gpu=memory.used --format=csv"
try:
memory_use_info = output_to_list(sp.check_output(COMMAND.split(),stderr=sp.STDOUT))[1:]
except sp.CalledProcessError as e:
raise RuntimeError("command '{}' return with error (code {}): {}".format(e.cmd, e.returncode, e.output))
memory_use_values = [int(x.split()[0]) for x in memory_use_info]
return memory_use_values
def get_system_status():
cpu_percent = psutil.cpu_percent(interval=1)
cpu_count = psutil.cpu_count(logical=True)
virtual_mem = psutil.virtual_memory()
total_mem = virtual_mem.total / (1024 ** 3)
used_mem = virtual_mem.used / (1024 ** 3)
mem_percent = virtual_mem.percent
system_info = {
"cpu_percent": cpu_percent,
"cpu_count": cpu_count,
"total_memory_gb": round(total_mem, 2),
"used_memory_gb": round(used_mem, 2),
"memory_percent": mem_percent,
"gpus": get_gpu_memory(),
}
return system_info
def get_token_info(token: str) -> Dict[str, Any]:
token_info_path = hf_api.hf_hub_download(
repo_id=COMPETITION_ID,
filename=f"token_data_info/{token}.json",
repo_type="dataset",
)
with open(token_info_path, 'r') as f:
token_info = json.load(f)
return token_info
def download_submission_info(team_id: str) -> Dict[str, Any]:
"""
Download the submission info from Hugging Face Hub.
Args:
team_id (str): The team ID.
Returns:
Dict[str, Any]: The submission info.
"""
submission_info_path = hf_api.hf_hub_download(
repo_id=COMPETITION_ID,
filename=f"submission_info/{team_id}.json",
repo_type="dataset",
)
with open(submission_info_path, 'r') as f:
submission_info = json.load(f)
return submission_info
def upload_submission_info(team_id: str, user_submission_info: Dict[str, Any]):
user_submission_info_json = json.dumps(user_submission_info, indent=4)
user_submission_info_json_bytes = user_submission_info_json.encode("utf-8")
user_submission_info_json_buffer = io.BytesIO(user_submission_info_json_bytes)
hf_api.upload_file(
path_or_fileobj=user_submission_info_json_buffer,
path_in_repo=f"submission_info/{team_id}.json",
repo_id=COMPETITION_ID,
repo_type="dataset",
)
def update_submission_data(team_id: str, submission_id: str, data: Dict[str, Any]):
user_submission_info = download_submission_info(team_id)
for submission in user_submission_info["submissions"]:
if submission["submission_id"] == submission_id:
submission.update(data)
break
upload_submission_info(team_id, user_submission_info)
def delete_client_space(client_space_id: str):
try:
hf_api.delete_repo(
repo_id=client_space_id,
repo_type="space"
)
except:
print(f"Failed to delete space {client_space_id}. It may not exist or already deleted.")
class FifoDict:
def __init__(self, max_size: int):
self.max_size = max_size
self._order_dict = OrderedDict()
self.locker = threading.Lock()
def push(self, key: str, value: Any):
with self.locker:
if key in self._order_dict:
self._order_dict.move_to_end(key)
return
if len(self._order_dict) >= self.max_size:
self._order_dict.popitem(last=False)
self._order_dict[key] = value
def get(self, key: str) -> Any:
return self._order_dict.get(key, None)
@dataclass
class SceneConfig:
name: str
cfg: DictConfig
@dataclass
class EnvExecuteResult:
cur_scene_done: bool
done: bool
class EnvHandler:
"""A class to handle the environment for HUGSim.
This can include multiple scene and configurations.
"""
def __init__(self, scene_list: List[SceneConfig], base_output: str):
self._created_time = datetime.now(timezone.utc)
self._last_active_time = datetime.now(timezone.utc)
self._lock = threading.Lock()
self.scene_list = scene_list
self.base_output = base_output
self.env = None
self.reset_env()
def _switch_scene(self, scene_index: int):
"""
Switch to a different scene based on the index.
Args:
scene_index (int): The index of the scene to switch to.
"""
if scene_index < 0 or scene_index >= len(self.scene_list):
raise ValueError("Invalid scene index.")
self.close()
self.cur_scene_index = scene_index
scene_config = self.scene_list[scene_index]
self._log(f"Switch to scene: {scene_config.name}_{scene_config.cfg.scenario.mode}")
print(f"Switch to scene: {scene_config.name}_{scene_config.cfg.scenario.mode}")
self.cur_output = os.path.join(self.base_output,
f"{scene_config.name}_{scene_config.cfg.scenario.mode}")
os.makedirs(self.cur_output, exist_ok=True)
self.env = gymnasium.make('hugsim_env/HUGSim-v0', cfg=scene_config.cfg, output=self.cur_output)
self._scene_cnt = 0
self._scene_done = False
self._save_data = {'type': 'closeloop', 'frames': []}
self._observations_save = []
self._obs, self._info = self.env.reset()
self._log(f"Switched to scene: {scene_config.name}")
def close(self):
"""
Close the environment and release resources.
"""
if self.env is not None:
del self.env
self.env = None
self._log("Environment closed.")
def reset_env(self):
"""
Reset the environment and initialize variables.
"""
self._last_active_time = datetime.now(timezone.utc)
self._log_list = deque(maxlen=100)
self._done = False
self._score_list = []
self._switch_scene(0)
self._log("Environment reset complete.")
def get_current_state(self):
"""
Get the current state of the environment.
"""
self._last_active_time = datetime.now(timezone.utc)
return {
"obs": self._obs,
"info": self._info,
}
@property
def created_time(self) -> datetime:
"""
Get the creation time of the environment handler.
Returns:
datetime: The creation time.
"""
return self._created_time
@property
def last_active_time(self) -> datetime:
"""
Get the last active time of the environment handler.
Returns:
datetime: The last active time.
"""
return self._last_active_time
@property
def has_done(self) -> bool:
"""
Check if the episode is done.
Returns:
bool: True if the episode is done, False otherwise.
"""
return self._done
@property
def has_scene_done(self) -> bool:
"""
Check if the current scene is done.
Returns:
bool: True if the current scene is done, False otherwise.
"""
return self._scene_done
@property
def log_list(self) -> deque:
"""
Get the log list.
Returns:
deque: The log list containing recent log messages.
"""
return self._log_list
def execute_action(self, plan_traj: np.ndarray) -> EnvExecuteResult:
"""
Execute the action based on the planned trajectory.
Args:
plan_traj (Any): The planned trajectory to follow.
Returns:
bool: True if the episode is done, False otherwise.
"""
self._last_active_time = datetime.now(timezone.utc)
acc, steer_rate = traj2control(plan_traj, self._info)
action = {'acc': acc, 'steer_rate': steer_rate}
self._log("Executing action:", action)
self._obs, _, terminated, truncated, self._info = self.env.step(action)
self._scene_cnt += 1
self._scene_done = terminated or truncated or self._scene_cnt > 400
imu_plan_traj = plan_traj[:, [1, 0]]
imu_plan_traj[:, 1] *= -1
global_traj = traj_transform_to_global(imu_plan_traj, self._info['ego_box'])
self._save_data['frames'].append({
'time_stamp': self._info['timestamp'],
'is_key_frame': True,
'ego_box': self._info['ego_box'],
'obj_boxes': self._info['obj_boxes'],
'obj_names': ['car' for _ in self._info['obj_boxes']],
'planned_traj': {
'traj': global_traj,
'timestep': 0.5
},
'collision': self._info['collision'],
'rc': self._info['rc']
})
self._observations_save.append(self._obs['rgb'])
if not self._scene_done:
return EnvExecuteResult(cur_scene_done=False, done=False)
with open(os.path.join(self.cur_output, 'data.pkl'), 'wb') as wf:
pickle.dump([self._save_data], wf)
ground_xyz = np.asarray(o3d.io.read_point_cloud(os.path.join(self.cur_output, 'ground.ply')).points)
scene_xyz = np.asarray(o3d.io.read_point_cloud(os.path.join(self.cur_output, 'scene.ply')).points)
results = hugsim_evaluate([self._save_data], ground_xyz, scene_xyz)
with open(os.path.join(self.cur_output, 'eval.json'), 'w') as f:
json.dump(results, f)
self._score_list.append(results.copy())
to_video(self._observations_save, os.path.join(self.cur_output, 'video.mp4'))
self._log(f"Scene {self.cur_scene_index} completed. Evaluation results saved.")
if self.cur_scene_index < len(self.scene_list) - 1:
self._switch_scene(self.cur_scene_index + 1)
return EnvExecuteResult(cur_scene_done=True, done=False)
self._done = True
return EnvExecuteResult(cur_scene_done=True, done=True)
def _log(self, *messages):
log_message = f"[{str(datetime.now())}]" + " ".join([str(msg) for msg in messages]) + "\n"
with self._lock:
self._log_list.append(log_message)
def calculate_score(self) -> Dict[str, Any]:
"""
Calculate the score based on the current environment state.
Returns:
Dict[str, Any]: The score dictionary.
"""
if not self._done:
raise ValueError("Environment is not done yet. Cannot calculate score.")
rc = np.mean([float(score['rc']) for score in self._score_list]).round(4)
hdscore = np.mean([float(score['hdscore']) for score in self._score_list]).round(4)
return {"rc": rc, "hdscore": hdscore}
class EnvHandlerManager:
def __init__(self):
self._env_handlers = {}
self._token_info_map = {}
self._lock = threading.Lock()
threading.Thread(target=self._clean_expired_env_handlers, daemon=True).start()
def _get_scene_list(self, base_output: str) -> List[SceneConfig]:
"""
Load the scene configurations from the YAML files.
Returns:
List[SceneConfig]: A list of scene configurations.
"""
scene_list = []
for data_type in ['kitti360', 'waymo', 'nuscenes', 'pandaset']:
base_path = os.path.join(os.path.dirname(__file__), "web_server_config", f'{data_type}_base.yaml')
camera_path = os.path.join(os.path.dirname(__file__), "web_server_config", f'{data_type}_camera.yaml')
kinematic_path = os.path.join(os.path.dirname(__file__), "web_server_config", 'kinematic.yaml')
base_config = OmegaConf.load(base_path)
camera_config = OmegaConf.load(camera_path)
kinematic_config = OmegaConf.load(kinematic_path)
scenarios_list = glob(f"/app/app_datas/ss/scenarios/{data_type}/*.yaml")
for scenario_path in scenarios_list:
scenario_config = OmegaConf.load(scenario_path)
cfg = OmegaConf.merge(
{"scenario": scenario_config},
{"base": base_config},
{"camera": camera_config},
{"kinematic": kinematic_config}
)
model_path = os.path.join(cfg.base.model_base, cfg.scenario.scene_name)
model_config = OmegaConf.load(os.path.join(model_path, 'cfg.yaml'))
model_config.update({"model_path": f"/app/app_datas/ss/scenes/{data_type}/{cfg.scenario.scene_name}"})
cfg.update(model_config)
cfg.base.output_dir = base_output
scene_list.append(SceneConfig(name=cfg.scenario.scene_name, cfg=cfg))
return scene_list
def _generate_env_handler(self, env_id: str):
base_output = "/app/app_datas/env_output"
scene_list = self._get_scene_list(base_output)
output = os.path.join(base_output, f"{env_id}_hugsim_env")
os.makedirs(output, exist_ok=True)
return EnvHandler(scene_list, base_output=output)
def exists_env_handler(self, env_id: str) -> bool:
"""
Check if the environment handler for the given environment ID exists.
Args:
env_id (str): The environment ID.
Returns:
bool: True if the environment handler exists, False otherwise.
"""
with self._lock:
return env_id in self._env_handlers
def get_env_handler(self, env_id: str, token_info: Dict[str, Any]) -> EnvHandler:
"""
Get the environment handler for the given environment ID.
Args:
env_id (str): The environment ID.
Returns:
EnvHandler: The environment handler instance.
"""
with self._lock:
if env_id not in self._env_handlers:
self._env_handlers[env_id] = self._generate_env_handler(env_id)
self._token_info_map[env_id] = token_info
return self._env_handlers[env_id]
def close_env_handler(self, env_id: str):
"""
Close the environment handler for the given environment ID.
Args:
env_id (str): The environment ID.
"""
with self._lock:
env = self._env_handlers.pop(env_id, None)
self._env_handlers[env_id] = None
if env is not None:
env.close()
torch.cuda.empty_cache()
def _clean_expired_env_handlers(self):
"""
Clean up expired environment handlers based on the last active time.
"""
while 1:
try:
current_time = datetime.now(timezone.utc)
with self._lock:
expired_env_ids = [
env_id
for env_id, handler in self._env_handlers.items()
if handler and ((current_time - handler.created_time).total_seconds() > 3600 * 3.0 or (current_time - handler.last_active_time).total_seconds() > 600)
]
for env_id in expired_env_ids:
self.close_env_handler(env_id)
token_info = self._token_info_map.pop(env_id, None)
if token_info:
update_submission_data(token_info["team_id"], token_info["submission_id"], {"status": SubmissionStatus.FAILED.value, "error_message": "SPACE_TIMEOUT"})
delete_client_space(token_info["client_space_id"])
except Exception as e:
print(f"Error in cleaning expired environment handlers: {e}")
time.sleep(15)
app = FastAPI()
_result_dict= FifoDict(max_size=100)
env_manager = EnvHandlerManager()
def _get_env_handler(
auth_token: Optional[str] = Header(None),
query_token: Optional[str] = Query(None)
) -> EnvHandler:
token = auth_token or query_token
if not token:
raise HTTPException(status_code=401, detail="Authorization token is required.")
try:
token_info = get_token_info(token)
except Exception:
raise HTTPException(status_code=401)
submission_id = token_info["submission_id"]
team_id = token_info["team_id"]
if not env_manager.exists_env_handler(submission_id):
update_submission_data(team_id, submission_id, {"status": SubmissionStatus.PROCESSING.value})
env_handler = env_manager.get_env_handler(submission_id, token_info)
if env_handler is None:
raise HTTPException(status_code=404, detail="Environment handler already closed.")
return env_handler
def _load_numpy_ndarray_json_str(json_str: str) -> np.ndarray:
"""
Load a numpy ndarray from a JSON string.
"""
data = json.loads(json_str)
return np.array(data["data"], dtype=data["dtype"]).reshape(data["shape"])
@app.post("/reset")
def reset_endpoint(env_handler: EnvHandler = Depends(_get_env_handler)):
"""
Reset the environment.
"""
env_handler.reset_env()
return {"success": True}
@app.get("/get_current_state")
def get_current_state_endpoint(env_handler: EnvHandler = Depends(_get_env_handler)):
"""
Get the current state of the environment.
"""
state = env_handler.get_current_state()
data = {
"done": env_handler.has_done,
"cur_scene_done": env_handler.has_scene_done,
"state": state,
}
return Response(content=pickle.dumps(data), media_type="application/octet-stream")
@app.post("/execute_action")
def execute_action_endpoint(
plan_traj: str = Body(..., embed=True),
transaction_id: str = Body(..., embed=True),
auth_token: str = Header(...),
env_handler: EnvHandler = Depends(_get_env_handler)
):
"""
Execute the action based on the planned trajectory.
Args:
plan_traj (str): The planned trajectory in JSON format.
transaction_id (str): The unique transaction ID for caching results.
env_handler (EnvHandler): The environment handler instance.
Returns:
Response: The response containing the execution result.
"""
cache_result = _result_dict.get(transaction_id)
if cache_result is not None:
return Response(content=cache_result, media_type="application/octet-stream")
if env_handler.has_done:
result = pickle.dumps({"done": True, "cur_scene_done": True, "state": env_handler.get_current_state()})
_result_dict.push(transaction_id, result)
return Response(content=result, media_type="application/octet-stream")
plan_traj = _load_numpy_ndarray_json_str(plan_traj)
execute_result = env_handler.execute_action(plan_traj)
if execute_result.done:
token_info = get_token_info(auth_token)
env_manager.close_env_handler(token_info["submission_id"])
delete_client_space(token_info["client_space_id"])
final_score = env_handler.calculate_score()
update_submission_data(token_info["team_id"], token_info["submission_id"], {"status": SubmissionStatus.SUCCESS.value, "score": final_score})
hf_api.upload_folder(
repo_id=COMPETITION_ID,
folder_path=env_handler.base_output,
repo_type="dataset",
path_in_repo=f"eval_results/{token_info['submission_id']}",
)
shutil.rmtree(env_handler.base_output, ignore_errors=True)
result = pickle.dumps({"done": execute_result.done, "cur_scene_done": execute_result.cur_scene_done, "state": env_handler.get_current_state()})
_result_dict.push(transaction_id, result)
return Response(content=result, media_type="application/octet-stream")
state = env_handler.get_current_state()
result = pickle.dumps({"done": execute_result.done, "cur_scene_done": execute_result.cur_scene_done, "state": state})
_result_dict.push(transaction_id, result)
return Response(content=result, media_type="application/octet-stream")
@app.get("/submition_info")
def main_page_endpoint(env_handler: EnvHandler = Depends(_get_env_handler)):
"""
Endpoint to display the submission logs.
"""
log_str = "\n".join(env_handler.log_list)
html_content = f"""
<html><body><pre>{log_str}</pre></body></html>
<script>
setTimeout(function() {{
window.location.reload();
}}, 5000);
</script>
"""
return HTMLResponse(content=html_content)
@app.get("/")
def main_page_endpoint(
admin_token: Optional[str] = Query(None),
):
"""
Main page endpoint to display logs.
"""
if admin_token != ADMIN_TOKEN:
html_content = f"""
<html>
running
</html>
"""
return HTMLResponse(content=html_content)
system_info = get_system_status()
html_content = f"""
<html>
<head>
<title>System Status</title>
</head>
<body>
<h1>System Status</h1>
<pre>{json.dumps(system_info, indent=4)}</pre>
</body>
</html>
"""
return HTMLResponse(content=html_content)
uvicorn.run(app, host="0.0.0.0", port=7860, workers=1)