Spaces:
Running
Running
File size: 13,770 Bytes
d0dd276 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 |
import json
from typing import Optional, Union
from fastapi import APIRouter, Body, HTTPException, Path, Query, Request, Depends, status, Header
from fastapi.responses import StreamingResponse
from app.services import GeminiClient
from app.utils import protect_from_abuse,generate_cache_key,openAI_from_text,log
from app.utils.response import openAI_from_Gemini
from app.utils.auth import custom_verify_password
from .stream_handlers import process_stream_request
from .nonstream_handlers import process_request, process_nonstream_with_keepalive_stream
from app.models.schemas import ChatCompletionRequest, ChatCompletionResponse, ModelList, AIRequest, ChatRequestGemini
import app.config.settings as settings
import asyncio
from app.vertex.routes import chat_api, models_api
from app.vertex.models import OpenAIRequest, OpenAIMessage
# 创建路由器
router = APIRouter()
# 全局变量引用 - 这些将在main.py中初始化并传递给路由
key_manager = None
response_cache_manager = None
active_requests_manager = None
safety_settings = None
safety_settings_g2 = None
current_api_key = None
FAKE_STREAMING = None
FAKE_STREAMING_INTERVAL = None
PASSWORD = None
MAX_REQUESTS_PER_MINUTE = None
MAX_REQUESTS_PER_DAY_PER_IP = None
# 初始化路由器的函数
def init_router(
_key_manager,
_response_cache_manager,
_active_requests_manager,
_safety_settings,
_safety_settings_g2,
_current_api_key,
_fake_streaming,
_fake_streaming_interval,
_password,
_max_requests_per_minute,
_max_requests_per_day_per_ip
):
global key_manager, response_cache_manager, active_requests_manager
global safety_settings, safety_settings_g2, current_api_key
global FAKE_STREAMING, FAKE_STREAMING_INTERVAL
global PASSWORD, MAX_REQUESTS_PER_MINUTE, MAX_REQUESTS_PER_DAY_PER_IP
key_manager = _key_manager
response_cache_manager = _response_cache_manager
active_requests_manager = _active_requests_manager
safety_settings = _safety_settings
safety_settings_g2 = _safety_settings_g2
current_api_key = _current_api_key
FAKE_STREAMING = _fake_streaming
FAKE_STREAMING_INTERVAL = _fake_streaming_interval
PASSWORD = _password
MAX_REQUESTS_PER_MINUTE = _max_requests_per_minute
MAX_REQUESTS_PER_DAY_PER_IP = _max_requests_per_day_per_ip
async def verify_user_agent(request: Request):
if not settings.WHITELIST_USER_AGENT:
return
if request.headers.get("User-Agent") not in settings.WHITELIST_USER_AGENT:
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Not allowed client")
# todo : 添加 gemini 支持(流式返回)
async def get_cache(cache_key,is_stream: bool,is_gemini=False):
# 检查缓存是否存在,如果存在,返回缓存
cached_response, cache_hit = await response_cache_manager.get_and_remove(cache_key)
if cache_hit and cached_response:
log('info', f"缓存命中: {cache_key[:8]}...",
extra={'request_type': 'non-stream', 'model': cached_response.model})
if is_gemini:
if is_stream:
data = f"data: {json.dumps(cached_response.data, ensure_ascii=False)}\n\n"
return StreamingResponse(data, media_type="text/event-stream")
else:
return cached_response.data
if is_stream:
chunk = openAI_from_Gemini(cached_response,stream=True)
return StreamingResponse(chunk, media_type="text/event-stream")
else:
return openAI_from_Gemini(cached_response,stream=False)
return None
@router.get("/aistudio/models",response_model=ModelList)
async def aistudio_list_models(_ = Depends(custom_verify_password),
_2 = Depends(verify_user_agent)):
if settings.WHITELIST_MODELS:
filtered_models = [model for model in GeminiClient.AVAILABLE_MODELS if model in settings.WHITELIST_MODELS]
else:
filtered_models = [model for model in GeminiClient.AVAILABLE_MODELS if model not in settings.BLOCKED_MODELS]
return ModelList(data=[{"id": model, "object": "model", "created": 1678888888, "owned_by": "organization-owner"} for model in filtered_models])
@router.get("/vertex/models",response_model=ModelList)
async def vertex_list_models(request: Request,
_ = Depends(custom_verify_password),
_2 = Depends(verify_user_agent)):
# 使用vertex/routes/models_api的实现
return await models_api.list_models(request, current_api_key)
# API路由
@router.get("/v1/models",response_model=ModelList)
@router.get("/models",response_model=ModelList)
async def list_models(request: Request,
_ = Depends(custom_verify_password),
_2 = Depends(verify_user_agent)):
if settings.ENABLE_VERTEX:
return await vertex_list_models(request, _, _2)
return await aistudio_list_models(_, _2)
@router.post("/aistudio/chat/completions", response_model=ChatCompletionResponse)
async def aistudio_chat_completions(
request: Union[ChatCompletionRequest, AIRequest],
http_request: Request,
_ = Depends(custom_verify_password),
_2 = Depends(verify_user_agent),
):
format_type = getattr(request, 'format_type', None)
if format_type and (format_type == "gemini"):
is_gemini = True
else:
is_gemini = False
# 生成缓存键 - 用于匹配请求内容对应缓存
if settings.PRECISE_CACHE:
cache_key = generate_cache_key(request, is_gemini = is_gemini)
else:
cache_key = generate_cache_key(request, last_n_messages = settings.CALCULATE_CACHE_ENTRIES,is_gemini = is_gemini)
# 请求前基本检查
await protect_from_abuse(
http_request,
settings.MAX_REQUESTS_PER_MINUTE,
settings.MAX_REQUESTS_PER_DAY_PER_IP)
if request.model not in GeminiClient.AVAILABLE_MODELS:
log('error', "无效的模型",
extra={'model': request.model, 'status_code': 400})
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST, detail="无效的模型")
# 记录请求缓存键信息
log('info', f"请求缓存键: {cache_key[:8]}...",
extra={'request_type': 'non-stream', 'model': request.model})
# 检查缓存是否存在,如果存在,返回缓存
cached_response = await get_cache(cache_key, is_stream = request.stream,is_gemini=is_gemini)
if cached_response :
return cached_response
if not settings.PUBLIC_MODE:
# 构建包含缓存键的活跃请求池键
pool_key = f"{cache_key}"
# 查找所有使用相同缓存键的活跃任务
active_task = active_requests_manager.get(pool_key)
if active_task and not active_task.done():
log('info', f"发现相同请求的进行中任务",
extra={'request_type': 'stream' if request.stream else "non-stream", 'model': request.model})
# 等待已有任务完成
try:
# 设置超时,避免无限等待
await asyncio.wait_for(active_task, timeout=240)
# 使用任务结果
if active_task.done() and not active_task.cancelled():
result = active_task.result()
active_requests_manager.remove(pool_key)
if result:
return result
except (asyncio.TimeoutError, asyncio.CancelledError) as e:
# 任务超时或被取消的情况下,记录日志然后让代码继续执行
error_type = "超时" if isinstance(e, asyncio.TimeoutError) else "被取消"
log('warning', f"等待已有任务{error_type}: {pool_key}",
extra={'request_type': 'non-stream', 'model': request.model})
# 从活跃请求池移除该任务
if active_task.done() or active_task.cancelled():
active_requests_manager.remove(pool_key)
log('info', f"已从活跃请求池移除{error_type}任务: {pool_key}",
extra={'request_type': 'non-stream'})
if request.stream:
# 流式请求处理任务
process_task = asyncio.create_task(
process_stream_request(
chat_request = request,
key_manager=key_manager,
response_cache_manager = response_cache_manager,
safety_settings = safety_settings,
safety_settings_g2 = safety_settings_g2,
cache_key = cache_key
)
)
else:
# 检查是否启用非流式保活功能
if settings.NONSTREAM_KEEPALIVE_ENABLED:
# 使用带保活功能的非流式请求处理
process_task = asyncio.create_task(
process_nonstream_with_keepalive_stream(
chat_request = request,
key_manager = key_manager,
response_cache_manager = response_cache_manager,
safety_settings = safety_settings,
safety_settings_g2 = safety_settings_g2,
cache_key = cache_key,
is_gemini = is_gemini
)
)
else:
# 创建非流式请求处理任务
process_task = asyncio.create_task(
process_request(
chat_request = request,
key_manager = key_manager,
response_cache_manager = response_cache_manager,
safety_settings = safety_settings,
safety_settings_g2 = safety_settings_g2,
cache_key = cache_key
)
)
if not settings.PUBLIC_MODE:
# 将任务添加到活跃请求池
active_requests_manager.add(pool_key, process_task)
# 等待任务完成
try:
response = await process_task
if not settings.PUBLIC_MODE:
active_requests_manager.remove(pool_key)
return response
except Exception as e:
if not settings.PUBLIC_MODE:
# 如果任务失败,从活跃请求池中移除
active_requests_manager.remove(pool_key)
# 检查是否已有缓存的结果(可能是由另一个任务创建的)
cached_response = await get_cache(cache_key, is_stream = request.stream,is_gemini=is_gemini)
if cached_response :
return cached_response
# 发送错误信息给客户端
raise HTTPException(status_code=500, detail=f" hajimi 服务器内部处理时发生错误\n具体原因:{e}")
@router.post("/vertex/chat/completions", response_model=ChatCompletionResponse)
async def vertex_chat_completions(
request: ChatCompletionRequest,
http_request: Request,
_dp = Depends(custom_verify_password),
_du = Depends(verify_user_agent),
):
# 使用vertex/routes/chat_api的实现
# 转换消息格式
openai_messages = []
for message in request.messages:
openai_messages.append(OpenAIMessage(
role=message.get('role', ''),
content=message.get('content', '')
))
# 转换请求格式
vertex_request = OpenAIRequest(
model=request.model,
messages=openai_messages,
temperature=request.temperature,
max_tokens=request.max_tokens,
top_p=request.top_p,
top_k=request.top_k,
stream=request.stream,
stop=request.stop,
presence_penalty=request.presence_penalty,
frequency_penalty=request.frequency_penalty,
seed=getattr(request, 'seed', None),
logprobs=getattr(request, 'logprobs', None),
response_logprobs=getattr(request, 'response_logprobs', None),
n=request.n
)
# 调用vertex/routes/chat_api的实现
return await chat_api.chat_completions(http_request, vertex_request, current_api_key)
@router.post("/v1/chat/completions", response_model=ChatCompletionResponse)
@router.post("/chat/completions", response_model=ChatCompletionResponse)
async def chat_completions(
request: ChatCompletionRequest,
http_request: Request,
_dp = Depends(custom_verify_password),
_du = Depends(verify_user_agent),
):
"""处理API请求的主函数,根据需要处理流式或非流式请求"""
if settings.ENABLE_VERTEX:
return await vertex_chat_completions(request, http_request, _dp, _du)
return await aistudio_chat_completions(request, http_request, _dp, _du)
@router.post("/gemini/{api_version:str}/models/{model_and_responseType:path}")
async def gemini_chat_completions(
request: Request,
model_and_responseType: str = Path(...),
key: Optional[str] = Query(None),
alt: Optional[str] = Query(None, description=" sse 或 None"),
payload: ChatRequestGemini = Body(...),
_dp = Depends(custom_verify_password),
_du = Depends(verify_user_agent),
):
# 提取路径参数
is_stream = False
try:
model_name, action_type = model_and_responseType.split(":", 1)
if action_type == "streamGenerateContent":
is_stream = True
except ValueError:
raise HTTPException(status_code=400, detail="无效的请求路径")
geminiRequest = AIRequest(payload=payload,model=model_name,stream=is_stream,format_type='gemini')
return await aistudio_chat_completions(geminiRequest, request, _dp, _du)
|