Spaces:

blanchon
/

RobotHub-InferenceServer

Sleeping

App Files Files Community

RobotHub-InferenceServer / src /inference_server /main.py

blanchon

Update

63ed3a7 about 2 months ago

raw

history blame

11.4 kB

	import logging
	import os
	from contextlib import asynccontextmanager

	from fastapi import FastAPI, HTTPException
	from fastapi.middleware.cors import CORSMiddleware
	from pydantic import BaseModel

	from inference_server.models import list_supported_policies
	from inference_server.session_manager import SessionManager

	# Configure logging
	logging.basicConfig(
	level=logging.INFO,
	format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
	)
	logger = logging.getLogger(__name__)

	# Global session manager
	session_manager = SessionManager()


	@asynccontextmanager
	async def lifespan(app: FastAPI):
	"""Handle app startup and shutdown."""
	logger.info("🚀 Inference Server starting up...")
	yield
	logger.info("🔄 Inference Server shutting down...")
	await session_manager.cleanup_all_sessions()
	logger.info("✅ Inference Server shutdown complete")


	# FastAPI app
	app = FastAPI(
	title="Inference Server",
	description="Multi-Policy Model Inference Server for Real-time Robot Control",
	version="1.0.0",
	lifespan=lifespan,
	)

	# Add CORS middleware
	app.add_middleware(
	CORSMiddleware,
	allow_origins=["*"], # In production, specify actual origins
	allow_credentials=True,
	allow_methods=["*"],
	allow_headers=["*"],
	)


	# Request/Response models
	class CreateSessionRequest(BaseModel):
	session_id: str
	policy_path: str
	camera_names: list[str] = ["front"] # Support multiple cameras
	arena_server_url: str = "http://localhost:8000"
	workspace_id: str \| None = None # Optional workspace ID
	policy_type: str = "act" # Policy type: act, pi0, pi0fast, smolvla, diffusion
	language_instruction: str \| None = None # For vision-language policies


	class CreateSessionResponse(BaseModel):
	workspace_id: str
	camera_room_ids: dict[str, str] # {camera_name: room_id}
	joint_input_room_id: str
	joint_output_room_id: str


	class SessionStatusResponse(BaseModel):
	session_id: str
	status: str
	policy_path: str
	policy_type: str
	camera_names: list[str] # Multiple camera names
	workspace_id: str
	rooms: dict
	stats: dict
	inference_stats: dict \| None = None
	error_message: str \| None = None


	# Health check
	@app.get("/", tags=["Health"])
	async def root():
	"""Health check endpoint."""
	return {"message": "Inference Server is running", "status": "healthy"}


	@app.get("/health", tags=["Health"])
	async def health_check():
	"""Detailed health check."""
	return {
	"status": "healthy",
	"active_sessions": len(session_manager.sessions),
	"session_ids": list(session_manager.sessions.keys()),
	}


	@app.get("/policies", tags=["Policies"])
	async def list_policies():
	"""List supported policy types."""
	return {
	"supported_policies": list_supported_policies(),
	"description": "Available policy types for inference",
	}


	# Session management endpoints
	@app.post("/sessions", response_model=CreateSessionResponse, tags=["Sessions"])
	async def create_session(request: CreateSessionRequest):
	"""
	Create a new inference session.

	If workspace_id is provided, all rooms will be created in that workspace.
	If workspace_id is not provided, a new workspace will be generated automatically.
	All rooms for a session (cameras + joints) are always created in the same workspace.
	"""
	try:
	room_ids = await session_manager.create_session(
	session_id=request.session_id,
	policy_path=request.policy_path,
	camera_names=request.camera_names,
	arena_server_url=request.arena_server_url,
	workspace_id=request.workspace_id,
	policy_type=request.policy_type,
	language_instruction=request.language_instruction,
	)
	return CreateSessionResponse(**room_ids)
	except ValueError as e:
	raise HTTPException(status_code=400, detail=str(e))
	except Exception as e:
	logger.exception(f"Failed to create session {request.session_id}")
	raise HTTPException(status_code=500, detail=f"Failed to create session: {e!s}")


	@app.get("/sessions", response_model=list[SessionStatusResponse], tags=["Sessions"])
	async def list_sessions():
	"""List all sessions."""
	sessions = await session_manager.list_sessions()
	return [SessionStatusResponse(**session) for session in sessions]


	@app.get(
	"/sessions/{session_id}", response_model=SessionStatusResponse, tags=["Sessions"]
	)
	async def get_session_status(session_id: str):
	"""Get status of a specific session."""
	try:
	status = await session_manager.get_session_status(session_id)
	return SessionStatusResponse(**status)
	except KeyError:
	raise HTTPException(status_code=404, detail=f"Session {session_id} not found")


	@app.post("/sessions/{session_id}/start", tags=["Control"])
	async def start_inference(session_id: str):
	"""Start inference for a session."""
	try:
	await session_manager.start_inference(session_id)
	return {"message": f"Inference started for session {session_id}"}
	except KeyError:
	raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
	except Exception as e:
	logger.exception(f"Failed to start inference for session {session_id}")
	raise HTTPException(status_code=500, detail=f"Failed to start inference: {e!s}")


	@app.post("/sessions/{session_id}/stop", tags=["Control"])
	async def stop_inference(session_id: str):
	"""Stop inference for a session."""
	try:
	await session_manager.stop_inference(session_id)
	return {"message": f"Inference stopped for session {session_id}"}
	except KeyError:
	raise HTTPException(status_code=404, detail=f"Session {session_id} not found")


	@app.post("/sessions/{session_id}/restart", tags=["Control"])
	async def restart_inference(session_id: str):
	"""Restart inference for a session."""
	try:
	await session_manager.restart_inference(session_id)
	return {"message": f"Inference restarted for session {session_id}"}
	except KeyError:
	raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
	except Exception as e:
	logger.exception(f"Failed to restart inference for session {session_id}")
	raise HTTPException(
	status_code=500, detail=f"Failed to restart inference: {e!s}"
	)


	@app.delete("/sessions/{session_id}", tags=["Sessions"])
	async def delete_session(session_id: str):
	"""Delete a session."""
	try:
	await session_manager.delete_session(session_id)
	return {"message": f"Session {session_id} deleted"}
	except KeyError:
	raise HTTPException(status_code=404, detail=f"Session {session_id} not found")


	# Debug endpoints for enhanced monitoring
	@app.get("/debug/system", tags=["Debug"])
	async def get_system_info():
	"""Get system information for debugging."""
	import psutil
	import torch

	try:
	# System info
	system_info = {
	"cpu_percent": psutil.cpu_percent(interval=1),
	"memory": {
	"total": psutil.virtual_memory().total,
	"available": psutil.virtual_memory().available,
	"percent": psutil.virtual_memory().percent,
	},
	"disk": {
	"total": psutil.disk_usage("/").total,
	"used": psutil.disk_usage("/").used,
	"percent": psutil.disk_usage("/").percent,
	},
	}

	# GPU info if available
	if torch.cuda.is_available():
	system_info["gpu"] = {
	"device_count": torch.cuda.device_count(),
	"current_device": torch.cuda.current_device(),
	"device_name": torch.cuda.get_device_name(),
	"memory_allocated": torch.cuda.memory_allocated(),
	"memory_cached": torch.cuda.memory_reserved(),
	}

	return system_info
	except Exception as e:
	return {"error": f"Failed to get system info: {e}"}


	@app.get("/debug/logs", tags=["Debug"])
	async def get_recent_logs():
	"""Get recent log entries for debugging."""
	try:
	# This is a simple implementation - in production you might want to read from actual log files
	return {
	"message": "Log endpoint available",
	"note": "Implement actual log reading if needed",
	"active_sessions": len(session_manager.sessions),
	}
	except Exception as e:
	return {"error": f"Failed to get logs: {e}"}


	@app.post("/debug/sessions/{session_id}/reset", tags=["Debug"])
	async def debug_reset_session(session_id: str):
	"""Reset a session's internal state for debugging."""
	try:
	if session_id not in session_manager.sessions:
	raise HTTPException(
	status_code=404, detail=f"Session {session_id} not found"
	)

	session = session_manager.sessions[session_id]

	# Reset inference engine if available
	if session.inference_engine:
	session.inference_engine.reset()

	# Clear action queue
	session.action_queue.clear()

	# Reset flags
	for camera_name in session.camera_names:
	session.images_updated[camera_name] = False
	session.joints_updated = False

	return {"message": f"Session {session_id} state reset successfully"}

	except Exception as e:
	logger.exception(f"Failed to reset session {session_id}")
	raise HTTPException(status_code=500, detail=f"Failed to reset session: {e}")


	@app.get("/debug/sessions/{session_id}/queue", tags=["Debug"])
	async def get_session_queue_info(session_id: str):
	"""Get detailed information about a session's action queue."""
	try:
	if session_id not in session_manager.sessions:
	raise HTTPException(
	status_code=404, detail=f"Session {session_id} not found"
	)

	session = session_manager.sessions[session_id]

	return {
	"session_id": session_id,
	"queue_length": len(session.action_queue),
	"queue_maxlen": session.action_queue.maxlen,
	"n_action_steps": session.n_action_steps,
	"control_frequency_hz": session.control_frequency_hz,
	"inference_frequency_hz": session.inference_frequency_hz,
	"last_queue_cleanup": session.last_queue_cleanup,
	"data_status": {
	"has_joint_data": session.latest_joint_positions is not None,
	"images_status": {
	camera: camera in session.latest_images
	for camera in session.camera_names
	},
	"images_updated": session.images_updated.copy(),
	"joints_updated": session.joints_updated,
	},
	}

	except Exception as e:
	logger.exception(f"Failed to get queue info for session {session_id}")
	raise HTTPException(status_code=500, detail=f"Failed to get queue info: {e}")


	# Main entry point
	if __name__ == "__main__":
	import uvicorn

	port = int(os.environ.get("PORT", 8001))
	uvicorn.run(
	"inference_server.main:app",
	host="0.0.0.0",
	port=port,
	reload=False,
	log_level="info",
	)