Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -209,7 +209,7 @@ async def root():
|
|
| 209 |
|
| 210 |
# --- 7. FastAPI 路由: /infer4 (極簡版) ---
|
| 211 |
|
| 212 |
-
@app.post("/
|
| 213 |
async def infer4_endpoint(request: InferenceRequestMinimal):
|
| 214 |
FIXED_SYSTEM_MESSAGE = "You are a friendly and concise assistant."
|
| 215 |
FIXED_MAX_TOKENS = 4096
|
|
@@ -239,7 +239,7 @@ async def infer4_endpoint(request: InferenceRequestMinimal):
|
|
| 239 |
|
| 240 |
# --- 8. FastAPI 路由: /infer_amd (使用 Gradio Client) ---
|
| 241 |
|
| 242 |
-
@app.post("/
|
| 243 |
async def infer_amd_endpoint(request: InferenceRequestMinimal):
|
| 244 |
"""
|
| 245 |
使用 gradio_client 呼叫 AMD_SPACE_ID 所指定的 Space 的 /chat API。
|
|
|
|
| 209 |
|
| 210 |
# --- 7. FastAPI 路由: /infer4 (極簡版) ---
|
| 211 |
|
| 212 |
+
@app.post("/local/qwen-0-6b", summary="執行 LLM 推論 (v4: 極簡輸入/僅回傳 response 欄位)")
|
| 213 |
async def infer4_endpoint(request: InferenceRequestMinimal):
|
| 214 |
FIXED_SYSTEM_MESSAGE = "You are a friendly and concise assistant."
|
| 215 |
FIXED_MAX_TOKENS = 4096
|
|
|
|
| 239 |
|
| 240 |
# --- 8. FastAPI 路由: /infer_amd (使用 Gradio Client) ---
|
| 241 |
|
| 242 |
+
@app.post("/remote/amd", summary="使用 Gradio Client 呼叫外部 AMD LLM Space")
|
| 243 |
async def infer_amd_endpoint(request: InferenceRequestMinimal):
|
| 244 |
"""
|
| 245 |
使用 gradio_client 呼叫 AMD_SPACE_ID 所指定的 Space 的 /chat API。
|