hugsim_web_server_0 / web_server.py
Libra-1995's picture
fix: lock
a125da9
raw
history blame
18.6 kB
import sys
import os
import pickle
import json
import threading
import io
import enum
import hugsim_env
import subprocess as sp
from collections import deque, OrderedDict
from datetime import datetime
from typing import Any, Dict, Optional, List
from dataclasses import dataclass
sys.path.append(os.getcwd())
from fastapi import FastAPI, Body, Header, Depends, HTTPException, Query
from fastapi.responses import HTMLResponse, Response
from omegaconf import OmegaConf, DictConfig
from huggingface_hub import HfApi
import open3d as o3d
import numpy as np
import gymnasium
import uvicorn
import psutil
import torch
from sim.utils.sim_utils import traj2control, traj_transform_to_global
from sim.utils.score_calculator import hugsim_evaluate
ADMIN_TOKEN = os.getenv('ADMIN_TOKEN', None)
HF_TOKEN = os.getenv('HF_TOKEN', None)
COMPETITION_ID = os.getenv('COMPETITION_ID', None)
hf_api = HfApi(token=HF_TOKEN)
class SubmissionStatus(enum.Enum):
PENDING = 0
QUEUED = 1
PROCESSING = 2
SUCCESS = 3
FAILED = 4
def get_gpu_memory():
output_to_list = lambda x: x.decode('ascii').split('\n')[:-1]
COMMAND = "nvidia-smi --query-gpu=memory.used --format=csv"
try:
memory_use_info = output_to_list(sp.check_output(COMMAND.split(),stderr=sp.STDOUT))[1:]
except sp.CalledProcessError as e:
raise RuntimeError("command '{}' return with error (code {}): {}".format(e.cmd, e.returncode, e.output))
memory_use_values = [int(x.split()[0]) for x in memory_use_info]
return memory_use_values
def get_system_status():
cpu_percent = psutil.cpu_percent(interval=1)
cpu_count = psutil.cpu_count(logical=True)
virtual_mem = psutil.virtual_memory()
total_mem = virtual_mem.total / (1024 ** 3)
used_mem = virtual_mem.used / (1024 ** 3)
mem_percent = virtual_mem.percent
system_info = {
"cpu_percent": cpu_percent,
"cpu_count": cpu_count,
"total_memory_gb": round(total_mem, 2),
"used_memory_gb": round(used_mem, 2),
"memory_percent": mem_percent,
"gpus": get_gpu_memory(),
}
return system_info
def get_token_info(token: str) -> Dict[str, Any]:
token_info_path = hf_api.hf_hub_download(
repo_id=COMPETITION_ID,
filename=f"token_data_info/{token}.json",
repo_type="dataset",
)
with open(token_info_path, 'r') as f:
token_info = json.load(f)
return token_info
def download_submission_info(team_id: str) -> Dict[str, Any]:
"""
Download the submission info from Hugging Face Hub.
Args:
team_id (str): The team ID.
Returns:
Dict[str, Any]: The submission info.
"""
submission_info_path = hf_api.hf_hub_download(
repo_id=COMPETITION_ID,
filename=f"submission_info/{team_id}.json",
repo_type="dataset",
)
with open(submission_info_path, 'r') as f:
submission_info = json.load(f)
return submission_info
def upload_submission_info(team_id: str, user_submission_info: Dict[str, Any]):
user_submission_info_json = json.dumps(user_submission_info, indent=4)
user_submission_info_json_bytes = user_submission_info_json.encode("utf-8")
user_submission_info_json_buffer = io.BytesIO(user_submission_info_json_bytes)
hf_api.upload_file(
path_or_fileobj=user_submission_info_json_buffer,
path_in_repo=f"submission_info/{team_id}.json",
repo_id=COMPETITION_ID,
repo_type="dataset",
)
def update_submission_status(team_id: str, submission_id: str, status: int):
user_submission_info = download_submission_info(team_id)
for submission in user_submission_info["submissions"]:
if submission["submission_id"] == submission_id:
submission["status"] = status
break
upload_submission_info(team_id, user_submission_info)
def delete_client_space(client_space_id: str):
try:
hf_api.delete_repo(
repo_id=client_space_id,
repo_type="space"
)
except:
print(f"Failed to delete space {client_space_id}. It may not exist or already deleted.")
class FifoDict:
def __init__(self, max_size: int):
self.max_size = max_size
self._order_dict = OrderedDict()
self.locker = threading.Lock()
def push(self, key: str, value: Any):
with self.locker:
if key in self._order_dict:
self._order_dict.move_to_end(key)
return
if len(self._order_dict) >= self.max_size:
self._order_dict.popitem(last=False)
self._order_dict[key] = value
def get(self, key: str) -> Any:
return self._order_dict.get(key, None)
@dataclass
class SceneConfig:
name: str
cfg: DictConfig
@dataclass
class EnvExecuteResult:
cur_scene_done: bool
done: bool
class EnvHandler:
"""A class to handle the environment for HUGSim.
This can include multiple scene and configurations.
"""
def __init__(self, scene_list: List[SceneConfig], base_output: str):
self._lock = threading.Lock()
self.scene_list = scene_list
self.base_output = base_output
self.env = None
self.reset_env()
def _switch_scene(self, scene_index: int):
"""
Switch to a different scene based on the index.
Args:
scene_index (int): The index of the scene to switch to.
"""
if scene_index < 0 or scene_index >= len(self.scene_list):
raise ValueError("Invalid scene index.")
self.cur_scene_index = scene_index
scene_config = self.scene_list[scene_index]
self.close()
self.cur_otuput = os.path.join(self.base_output, scene_config.name)
self.env = gymnasium.make('hugsim_env/HUGSim-v0', cfg=scene_config.cfg, output=self.cur_otuput)
self._scene_cnt = 0
self._scene_done = False
self._save_data = {'type': 'closeloop', 'frames': []}
self._obs, self._info = self.env.reset()
self._log(f"Switched to scene: {scene_config.name}")
def close(self):
"""
Close the environment and release resources.
"""
if self.env is not None:
del self.env
self.env = None
self._log("Environment closed.")
def reset_env(self):
"""
Reset the environment and initialize variables.
"""
self._done = False
self._switch_scene(0)
self._log_list = deque(maxlen=100)
self._log("Environment reset complete.")
def get_current_state(self):
"""
Get the current state of the environment.
"""
return {
"obs": self._obs,
"info": self._info,
}
@property
def has_done(self) -> bool:
"""
Check if the episode is done.
Returns:
bool: True if the episode is done, False otherwise.
"""
return self._done
@property
def has_scene_done(self) -> bool:
"""
Check if the current scene is done.
Returns:
bool: True if the current scene is done, False otherwise.
"""
return self._scene_done
@property
def log_list(self) -> deque:
"""
Get the log list.
Returns:
deque: The log list containing recent log messages.
"""
return self._log_list
def execute_action(self, plan_traj: np.ndarray) -> EnvExecuteResult:
"""
Execute the action based on the planned trajectory.
Args:
plan_traj (Any): The planned trajectory to follow.
Returns:
bool: True if the episode is done, False otherwise.
"""
acc, steer_rate = traj2control(plan_traj, self._info)
action = {'acc': acc, 'steer_rate': steer_rate}
self._log("Executing action:", action)
self._obs, _, terminated, truncated, self._info = self.env.step(action)
self._scene_cnt += 1
self._scene_done = terminated or truncated or self._scene_cnt > 400
imu_plan_traj = plan_traj[:, [1, 0]]
imu_plan_traj[:, 1] *= -1
global_traj = traj_transform_to_global(imu_plan_traj, self._info['ego_box'])
self._save_data['frames'].append({
'time_stamp': self._info['timestamp'],
'is_key_frame': True,
'ego_box': self._info['ego_box'],
'obj_boxes': self._info['obj_boxes'],
'obj_names': ['car' for _ in self._info['obj_boxes']],
'planned_traj': {
'traj': global_traj,
'timestep': 0.5
},
'collision': self._info['collision'],
'rc': self._info['rc']
})
if not self._scene_done:
return EnvExecuteResult(cur_scene_done=False, done=False)
with open(os.path.join(self.cur_otuput, 'data.pkl'), 'wb') as wf:
pickle.dump([self._save_data], wf)
ground_xyz = np.asarray(o3d.io.read_point_cloud(os.path.join(self.cur_otuput, 'ground.ply')).points)
scene_xyz = np.asarray(o3d.io.read_point_cloud(os.path.join(self.cur_otuput, 'scene.ply')).points)
results = hugsim_evaluate([self._save_data], ground_xyz, scene_xyz)
with open(os.path.join(self.cur_otuput, 'eval.json'), 'w') as f:
json.dump(results, f)
self._log(f"Scene {self.cur_scene_index} completed. Evaluation results saved.")
if self.cur_scene_index < len(self.scene_list) - 1:
self._switch_scene(self.cur_scene_index + 1)
return EnvExecuteResult(cur_scene_done=True, done=False)
self._done = True
return EnvExecuteResult(cur_scene_done=True, done=True)
def _log(self, *messages):
log_message = f"[{str(datetime.now())}]" + " ".join([str(msg) for msg in messages]) + "\n"
with self._lock:
self._log_list.append(log_message)
class EnvHandlerManager:
def __init__(self):
self._env_handlers = {}
self._lock = threading.Lock()
def _get_scene_list(self, env_id: str, base_output: str) -> List[SceneConfig]:
"""
Load the scene configurations from the YAML files.
Returns:
List[SceneConfig]: A list of scene configurations.
"""
base_path = os.path.join(os.path.dirname(__file__), 'docker', "web_server_config", 'nuscenes_base.yaml')
scenario_path = os.path.join(os.path.dirname(__file__), 'docker', "web_server_config", 'scene-0383-medium-00.yaml')
camera_path = os.path.join(os.path.dirname(__file__), 'docker', "web_server_config", 'nuscenes_camera.yaml')
kinematic_path = os.path.join(os.path.dirname(__file__), 'docker', "web_server_config", 'kinematic.yaml')
scenario_config = OmegaConf.load(scenario_path)
base_config = OmegaConf.load(base_path)
camera_config = OmegaConf.load(camera_path)
kinematic_config = OmegaConf.load(kinematic_path)
cfg = OmegaConf.merge(
{"scenario": scenario_config},
{"base": base_config},
{"camera": camera_config},
{"kinematic": kinematic_config}
)
model_path = os.path.join(cfg.base.model_base, cfg.scenario.scene_name)
model_config = OmegaConf.load(os.path.join(model_path, 'cfg.yaml'))
model_config.update({"model_path": "/app/app_datas/PAMI2024/release/ss/scenes/nuscenes/scene-0383"})
cfg.update(model_config)
cfg.base.output_dir = base_output
return [
SceneConfig(name=cfg.scenario.scene_name, cfg=cfg)
]
def _generate_env_handler(self, env_id: str):
base_output = "/app/app_datas/env_output"
scene_list = self._get_scene_list(env_id, base_output)
output = os.path.join(base_output, f"{env_id}_hugsim_env")
os.makedirs(output, exist_ok=True)
return EnvHandler(scene_list, base_output=output)
def exists_env_handler(self, env_id: str) -> bool:
"""
Check if the environment handler for the given environment ID exists.
Args:
env_id (str): The environment ID.
Returns:
bool: True if the environment handler exists, False otherwise.
"""
with self._lock:
return env_id in self._env_handlers
def get_env_handler(self, env_id: str) -> EnvHandler:
"""
Get the environment handler for the given environment ID.
Args:
env_id (str): The environment ID.
Returns:
EnvHandler: The environment handler instance.
"""
with self._lock:
if env_id not in self._env_handlers:
self._env_handlers[env_id] = self._generate_env_handler(env_id)
return self._env_handlers[env_id]
def close_env_handler(self, env_id: str):
"""
Close the environment handler for the given environment ID.
Args:
env_id (str): The environment ID.
"""
with self._lock:
env = self._env_handlers.pop(env_id, None)
self._env_handlers[env_id] = None
if env is not None:
env.close()
torch.cuda.empty_cache()
app = FastAPI()
_result_dict= FifoDict(max_size=100)
env_manager = EnvHandlerManager()
def _get_env_handler(
auth_token: Optional[str] = Header(None),
query_token: Optional[str] = Query(None)
) -> EnvHandler:
token = auth_token or query_token
if not token:
raise HTTPException(status_code=401, detail="Authorization token is required.")
try:
token_info = get_token_info(token)
except Exception:
raise HTTPException(status_code=401)
submission_id = token_info["submission_id"]
if not env_manager.exists_env_handler(submission_id):
update_submission_status(token_info["team_id"], submission_id, SubmissionStatus.PROCESSING.value)
env_handler = env_manager.get_env_handler(submission_id)
if env_handler is None:
raise HTTPException(status_code=404, detail="Environment handler already closed.")
return env_handler
def _load_numpy_ndarray_json_str(json_str: str) -> np.ndarray:
"""
Load a numpy ndarray from a JSON string.
"""
data = json.loads(json_str)
return np.array(data["data"], dtype=data["dtype"]).reshape(data["shape"])
@app.post("/reset")
def reset_endpoint(env_handler: EnvHandler = Depends(_get_env_handler)):
"""
Reset the environment.
"""
env_handler.reset_env()
return {"success": True}
@app.get("/get_current_state")
def get_current_state_endpoint(env_handler: EnvHandler = Depends(_get_env_handler)):
"""
Get the current state of the environment.
"""
state = env_handler.get_current_state()
data = {
"done": env_handler.has_done,
"cur_scene_done": env_handler.has_scene_done,
"state": state,
}
return Response(content=pickle.dumps(data), media_type="application/octet-stream")
@app.post("/execute_action")
def execute_action_endpoint(
plan_traj: str = Body(..., embed=True),
transaction_id: str = Body(..., embed=True),
auth_token: str = Header(...),
env_handler: EnvHandler = Depends(_get_env_handler)
):
"""
Execute the action based on the planned trajectory.
Args:
plan_traj (str): The planned trajectory in JSON format.
transaction_id (str): The unique transaction ID for caching results.
env_handler (EnvHandler): The environment handler instance.
Returns:
Response: The response containing the execution result.
"""
cache_result = _result_dict.get(transaction_id)
if cache_result is not None:
return Response(content=cache_result, media_type="application/octet-stream")
if env_handler.has_done:
result = pickle.dumps({"done": True, "cur_scene_done": True, "state": env_handler.get_current_state()})
_result_dict.push(transaction_id, result)
return Response(content=result, media_type="application/octet-stream")
plan_traj = _load_numpy_ndarray_json_str(plan_traj)
execute_result = env_handler.execute_action(plan_traj)
if execute_result.done:
token_info = get_token_info(auth_token)
env_manager.close_env_handler(token_info["submission_id"])
delete_client_space(token_info["client_space_id"])
update_submission_status(token_info["team_id"], token_info["submission_id"], SubmissionStatus.SUCCESS.value)
hf_api.upload_folder(
repo_id=COMPETITION_ID,
folder_path=env_handler.base_output,
repo_type="dataset",
path_in_repo=f"eval_results/{token_info['submission_id']}",
)
result = pickle.dumps({"done": execute_result.done, "cur_scene_done": execute_result.cur_scene_done, "state": env_handler.get_current_state()})
_result_dict.push(transaction_id, result)
return Response(content=result, media_type="application/octet-stream")
state = env_handler.get_current_state()
result = pickle.dumps({"done": execute_result.done, "cur_scene_done": execute_result.cur_scene_done, "state": state})
_result_dict.push(transaction_id, result)
return Response(content=result, media_type="application/octet-stream")
@app.get("/submition_info")
def main_page_endpoint(env_handler: EnvHandler = Depends(_get_env_handler)):
"""
Endpoint to display the submission logs.
"""
log_str = "\n".join(env_handler.log_list)
html_content = f"""
<html><body><pre>{log_str}</pre></body></html>
<script>
setTimeout(function() {{
window.location.reload();
}}, 5000);
</script>
"""
return HTMLResponse(content=html_content)
@app.get("/")
def main_page_endpoint(
admin_token: Optional[str] = Query(None),
):
"""
Main page endpoint to display logs.
"""
if admin_token != ADMIN_TOKEN:
html_content = f"""
<html>
running
</html>
"""
return HTMLResponse(content=html_content)
system_info = get_system_status()
html_content = f"""
<html>
<head>
<title>System Status</title>
</head>
<body>
<h1>System Status</h1>
<pre>{json.dumps(system_info, indent=4)}</pre>
</body>
</html>
"""
return HTMLResponse(content=html_content)
uvicorn.run(app, host="0.0.0.0", port=7860, workers=1)