File size: 13,770 Bytes
d0dd276
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
import json
from typing import Optional, Union
from fastapi import APIRouter, Body, HTTPException, Path, Query, Request, Depends, status, Header
from fastapi.responses import StreamingResponse
from app.services import GeminiClient
from app.utils import protect_from_abuse,generate_cache_key,openAI_from_text,log
from app.utils.response import openAI_from_Gemini
from app.utils.auth import custom_verify_password
from .stream_handlers import process_stream_request
from .nonstream_handlers import process_request, process_nonstream_with_keepalive_stream
from app.models.schemas import ChatCompletionRequest, ChatCompletionResponse, ModelList, AIRequest, ChatRequestGemini
import app.config.settings as settings
import asyncio
from app.vertex.routes import chat_api, models_api
from app.vertex.models import OpenAIRequest, OpenAIMessage

# 创建路由器
router = APIRouter()

# 全局变量引用 - 这些将在main.py中初始化并传递给路由
key_manager = None
response_cache_manager = None
active_requests_manager = None
safety_settings = None
safety_settings_g2 = None
current_api_key = None
FAKE_STREAMING = None
FAKE_STREAMING_INTERVAL = None
PASSWORD = None
MAX_REQUESTS_PER_MINUTE = None
MAX_REQUESTS_PER_DAY_PER_IP = None

# 初始化路由器的函数
def init_router(
    _key_manager,
    _response_cache_manager,
    _active_requests_manager,
    _safety_settings,
    _safety_settings_g2,
    _current_api_key,
    _fake_streaming,
    _fake_streaming_interval,
    _password,
    _max_requests_per_minute,
    _max_requests_per_day_per_ip
):
    global key_manager, response_cache_manager, active_requests_manager
    global safety_settings, safety_settings_g2, current_api_key
    global FAKE_STREAMING, FAKE_STREAMING_INTERVAL
    global PASSWORD, MAX_REQUESTS_PER_MINUTE, MAX_REQUESTS_PER_DAY_PER_IP
    
    key_manager = _key_manager
    response_cache_manager = _response_cache_manager
    active_requests_manager = _active_requests_manager
    safety_settings = _safety_settings
    safety_settings_g2 = _safety_settings_g2
    current_api_key = _current_api_key
    FAKE_STREAMING = _fake_streaming
    FAKE_STREAMING_INTERVAL = _fake_streaming_interval
    PASSWORD = _password
    MAX_REQUESTS_PER_MINUTE = _max_requests_per_minute
    MAX_REQUESTS_PER_DAY_PER_IP = _max_requests_per_day_per_ip

async def verify_user_agent(request: Request):
    if not settings.WHITELIST_USER_AGENT:
        return
    if request.headers.get("User-Agent") not in settings.WHITELIST_USER_AGENT:
        raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Not allowed client")

# todo : 添加 gemini 支持(流式返回)
async def get_cache(cache_key,is_stream: bool,is_gemini=False):
    # 检查缓存是否存在,如果存在,返回缓存
    cached_response, cache_hit = await response_cache_manager.get_and_remove(cache_key)
    
    if cache_hit and cached_response:
        log('info', f"缓存命中: {cache_key[:8]}...", 
            extra={'request_type': 'non-stream', 'model': cached_response.model})
        
        if is_gemini:
            if is_stream:
                data = f"data: {json.dumps(cached_response.data, ensure_ascii=False)}\n\n"
                return StreamingResponse(data, media_type="text/event-stream")
            else:
                return cached_response.data
            
        
        if is_stream:
            chunk = openAI_from_Gemini(cached_response,stream=True)
            return StreamingResponse(chunk, media_type="text/event-stream")
        else: 
            return openAI_from_Gemini(cached_response,stream=False)

    return None

@router.get("/aistudio/models",response_model=ModelList)
async def aistudio_list_models(_ = Depends(custom_verify_password),
                               _2 = Depends(verify_user_agent)):
    if settings.WHITELIST_MODELS:
        filtered_models = [model for model in GeminiClient.AVAILABLE_MODELS if model in settings.WHITELIST_MODELS]
    else:
        filtered_models = [model for model in GeminiClient.AVAILABLE_MODELS if model not in settings.BLOCKED_MODELS]
    return ModelList(data=[{"id": model, "object": "model", "created": 1678888888, "owned_by": "organization-owner"} for model in filtered_models])

@router.get("/vertex/models",response_model=ModelList)
async def vertex_list_models(request: Request, 
                             _ = Depends(custom_verify_password),
                             _2 = Depends(verify_user_agent)):
    # 使用vertex/routes/models_api的实现
    return await models_api.list_models(request, current_api_key)

# API路由
@router.get("/v1/models",response_model=ModelList)
@router.get("/models",response_model=ModelList)
async def list_models(request: Request,
                      _ = Depends(custom_verify_password),
                      _2 = Depends(verify_user_agent)):
    if settings.ENABLE_VERTEX:
        return await vertex_list_models(request, _, _2)
    return await aistudio_list_models(_, _2)

@router.post("/aistudio/chat/completions", response_model=ChatCompletionResponse)
async def aistudio_chat_completions(
    request: Union[ChatCompletionRequest, AIRequest],
    http_request: Request,
    _ = Depends(custom_verify_password),
    _2 = Depends(verify_user_agent),
):
    format_type = getattr(request, 'format_type', None)
    if format_type and (format_type == "gemini"):
        is_gemini = True
    else:
        is_gemini = False
    
    # 生成缓存键 - 用于匹配请求内容对应缓存
    if settings.PRECISE_CACHE:
        cache_key = generate_cache_key(request, is_gemini = is_gemini)
    else:    
        cache_key = generate_cache_key(request, last_n_messages = settings.CALCULATE_CACHE_ENTRIES,is_gemini = is_gemini)
    
    # 请求前基本检查
    await protect_from_abuse(
        http_request, 
        settings.MAX_REQUESTS_PER_MINUTE, 
        settings.MAX_REQUESTS_PER_DAY_PER_IP)
    
    if request.model not in GeminiClient.AVAILABLE_MODELS:
        log('error', "无效的模型", 
            extra={'model': request.model, 'status_code': 400})
        raise HTTPException(
            status_code=status.HTTP_400_BAD_REQUEST, detail="无效的模型")
    
    
    # 记录请求缓存键信息
    log('info', f"请求缓存键: {cache_key[:8]}...", 
        extra={'request_type': 'non-stream', 'model': request.model})
    
    # 检查缓存是否存在,如果存在,返回缓存
    cached_response = await get_cache(cache_key, is_stream = request.stream,is_gemini=is_gemini)
    if cached_response :
        return cached_response
    
    if not settings.PUBLIC_MODE:
        # 构建包含缓存键的活跃请求池键
        pool_key = f"{cache_key}"
        
        # 查找所有使用相同缓存键的活跃任务
        active_task = active_requests_manager.get(pool_key)
        if active_task and not active_task.done():
            log('info', f"发现相同请求的进行中任务", 
                extra={'request_type': 'stream' if request.stream else "non-stream", 'model': request.model})
            
            # 等待已有任务完成
            try:
                # 设置超时,避免无限等待
                await asyncio.wait_for(active_task, timeout=240)
                
                # 使用任务结果
                if active_task.done() and not active_task.cancelled():
                    
                    result = active_task.result()
                    active_requests_manager.remove(pool_key)
                    if result:
                        return result
            
            except (asyncio.TimeoutError, asyncio.CancelledError) as e:
                # 任务超时或被取消的情况下,记录日志然后让代码继续执行
                error_type = "超时" if isinstance(e, asyncio.TimeoutError) else "被取消"
                log('warning', f"等待已有任务{error_type}: {pool_key}", 
                    extra={'request_type': 'non-stream', 'model': request.model})
                
                # 从活跃请求池移除该任务
                if active_task.done() or active_task.cancelled():
                    active_requests_manager.remove(pool_key)
                    log('info', f"已从活跃请求池移除{error_type}任务: {pool_key}", 
                        extra={'request_type': 'non-stream'})
    
        
    if request.stream:
        # 流式请求处理任务
        process_task = asyncio.create_task(
            process_stream_request(
                chat_request = request, 
                key_manager=key_manager,
                response_cache_manager = response_cache_manager,
                safety_settings = safety_settings,
                safety_settings_g2 = safety_settings_g2,
                cache_key = cache_key
            )
        )
    
    else:
        # 检查是否启用非流式保活功能
        if settings.NONSTREAM_KEEPALIVE_ENABLED:
            # 使用带保活功能的非流式请求处理
            process_task = asyncio.create_task(
                process_nonstream_with_keepalive_stream(
                    chat_request = request,
                    key_manager = key_manager,
                    response_cache_manager = response_cache_manager,
                    safety_settings = safety_settings,
                    safety_settings_g2 = safety_settings_g2,
                    cache_key = cache_key,
                    is_gemini = is_gemini
                )
            )
        else:
            # 创建非流式请求处理任务
            process_task = asyncio.create_task(
                process_request(
                    chat_request = request,
                    key_manager = key_manager,
                    response_cache_manager = response_cache_manager,
                    safety_settings = safety_settings,
                    safety_settings_g2 = safety_settings_g2,
                    cache_key = cache_key
                )
            )

    if not settings.PUBLIC_MODE:
        # 将任务添加到活跃请求池
        active_requests_manager.add(pool_key, process_task)
    
    # 等待任务完成
    try:
        response = await process_task
        if not settings.PUBLIC_MODE:
            active_requests_manager.remove(pool_key)
        
        return response
    except Exception as e:
        if not settings.PUBLIC_MODE:
            # 如果任务失败,从活跃请求池中移除
            active_requests_manager.remove(pool_key)
        
        # 检查是否已有缓存的结果(可能是由另一个任务创建的)
        cached_response = await get_cache(cache_key, is_stream = request.stream,is_gemini=is_gemini)
        if cached_response :
            return cached_response
        
        # 发送错误信息给客户端
        raise HTTPException(status_code=500, detail=f" hajimi 服务器内部处理时发生错误\n具体原因:{e}")

@router.post("/vertex/chat/completions", response_model=ChatCompletionResponse)
async def vertex_chat_completions(
    request: ChatCompletionRequest, 
    http_request: Request,
    _dp = Depends(custom_verify_password),
    _du = Depends(verify_user_agent),
    ):
    # 使用vertex/routes/chat_api的实现
    
    # 转换消息格式
    openai_messages = []
    for message in request.messages:
        openai_messages.append(OpenAIMessage(
            role=message.get('role', ''),
            content=message.get('content', '')
        ))
    
    # 转换请求格式
    vertex_request = OpenAIRequest(
        model=request.model,
        messages=openai_messages,
        temperature=request.temperature,
        max_tokens=request.max_tokens,
        top_p=request.top_p,
        top_k=request.top_k,
        stream=request.stream,
        stop=request.stop,
        presence_penalty=request.presence_penalty,
        frequency_penalty=request.frequency_penalty,
        seed=getattr(request, 'seed', None),
        logprobs=getattr(request, 'logprobs', None),
        response_logprobs=getattr(request, 'response_logprobs', None),
        n=request.n
    )
    
    # 调用vertex/routes/chat_api的实现
    return await chat_api.chat_completions(http_request, vertex_request, current_api_key)

@router.post("/v1/chat/completions", response_model=ChatCompletionResponse)
@router.post("/chat/completions", response_model=ChatCompletionResponse)
async def chat_completions(
    request: ChatCompletionRequest,
    http_request: Request,
    _dp = Depends(custom_verify_password),
    _du = Depends(verify_user_agent),
):
    """处理API请求的主函数,根据需要处理流式或非流式请求"""
    if settings.ENABLE_VERTEX:
        return await vertex_chat_completions(request, http_request, _dp, _du)
    return await aistudio_chat_completions(request, http_request, _dp, _du)

@router.post("/gemini/{api_version:str}/models/{model_and_responseType:path}")
async def gemini_chat_completions(
    request: Request,
    model_and_responseType: str = Path(...),
    key: Optional[str] = Query(None),
    alt: Optional[str] = Query(None, description=" sse 或 None"),
    payload: ChatRequestGemini = Body(...),
    _dp = Depends(custom_verify_password),
    _du = Depends(verify_user_agent),
):
    # 提取路径参数
    is_stream = False
    try:
        model_name, action_type = model_and_responseType.split(":", 1)
        if action_type == "streamGenerateContent":
            is_stream = True
        
    except ValueError:
        raise HTTPException(status_code=400, detail="无效的请求路径")
    
    geminiRequest = AIRequest(payload=payload,model=model_name,stream=is_stream,format_type='gemini')
    return await aistudio_chat_completions(geminiRequest, request, _dp, _du)